diff --git a/source/tools/detect/ext4tool/Makefile b/source/tools/detect/ext4tool/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6a58b3a5ece546dfb4ae837da8a53425d6927491 --- /dev/null +++ b/source/tools/detect/ext4tool/Makefile @@ -0,0 +1 @@ +include $(SRC)/mk/sub.mk diff --git a/source/tools/detect/ext4tool/ext4syscall/Makefile b/source/tools/detect/ext4tool/ext4syscall/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d4e1070f675225ff27e57b8f3a39a0d786b1008c --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/Makefile @@ -0,0 +1,3 @@ +target := ext4syscall + +include $(SRC)/mk/sub.mk diff --git a/source/tools/detect/ext4tool/ext4syscall/README.txt b/source/tools/detect/ext4tool/ext4syscall/README.txt new file mode 100644 index 0000000000000000000000000000000000000000..b1dd88d65ab28c14d9215c882599ac28008fb1d2 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/README.txt @@ -0,0 +1,113 @@ +1. ext4syscall工具介绍 +================== + +ext4syscall是ext4 open/write/read/fsync系统调用内核流程跟踪工具。 +主要功能: +1) 记录open/write/read/fsync函数在内核态的关键调用栈和耗时; +2) 记录执行时间内调用总次数和最大耗时流程; +3)兼容4.19/4.9/3.10内核。 + +2. 编译 +================== +./configure --enable-libbpf --enable-target-cgtool --enable-target-ext4syscall +make + +3. 使用说明 +================== + +3.1 ext4syscall使用说明 +================== + +# sysak ext4syscall --help +Usage: + sysak ext4syscall [options] [sysname args]] + options: -h, help information + sysname: + open/write/read/fsync + args: + args for the syscall, --help get more +Examples: + sysak ext4syscall -l + sysak ext4syscall write --help + +sysname参数:跟踪open/write/read/fsync系统调用 + +3.2 ext4syscall [sysname args] 使用说明 +================== + +# sysak ext4syscall write --help +Usage: ext4write [OPTION...] + + -b, --btf=BTF_PATH Specify path of the custom btf + -d, --debug Enable libbpf debug output + -l, --time=time duration time + -p, --pid=pid pid + -s, --threshold=threshold Show data that exceeds the threshold(us) + -t, --tid=tid tid + -?, --help Give this help list + --usage Give a short usage message + -V, --version Print program version + +参数说明: + -l:跟踪时长 + -p:跟踪任务的pid + -t:跟踪任务的tid + -s:系统调用的耗时阈值(us) + +4. 输出说明 +================== +输出内容包括: +1)所有执行write调用的任务信息; + + +2)系统调用次数统计: + syscall:[write] times:755(5s) // 5s执行755次write + +3)最大执行耗时的信息; +max_duration infomation: +pid:5129 tid:5129 comm:ext4syscalltest // pid:5129 tid:5129 命令名称:ext4syscalltest +byte:1048576 // write的大小 +duration:0.000731 // 耗时us +step func [entrytime offset] [returntime offset] [duration] +1 vfs_write [1748384.351305 0.000000] [1748384.352036 0.000731] [0.000731] +3 ext4_file_write_iter [1748384.351309 0.000004] [1748384.352035 0.000730] [0.000726] +4 ext4_buffered_write_iter [1748384.351311 0.000006] [1748384.352033 0.000728] [0.000722] +5 generic_perform_write [1748384.351327 0.000022] [1748384.352031 0.000726] [0.000704] +// step 表示步骤 +// func 表示函数名称 +// [entrytime offset] 函数开始执行时间和与第一个监控函数(指vfs_write)开始执行时的时间差 +// [returntime offset] 函数退出时间和与第一个监控函数(指vfs_write)开始执行时的时间差 +// [duration] 本函数退出时间和开始执行的时间差 + +1)和2)中间用”============================“隔开 + +举例1:跟踪系统5s内write +# sysak ext4syscall write -t 5 +... +pid:28861 tid:28861 comm:bash +byte:21 +duration:0.000020 +step func [entrytime offset] [returntime offset] [duration] +1 vfs_write [669701.755679 0.000000] [669701.755699 0.000020] [0.000020] + +============================ +syscall:[write] times:25(5s) + +max_duration infomation: +pid:30362 tid:30362 comm:ext4syscall_tes +byte:10485760 +duration:0.003350 +step func [entrytime offset] [returntime offset] [duration] +1 vfs_write [669701.751686 0.000000] [669701.755036 0.003350] [0.003350] +3 ext4_file_write_iter [669701.751689 0.000003] [669701.755034 0.003348] [0.003345] +// 第3步说明vfs执行到ext4 ext4_file_write_iter 看entrytime->offset耗时0.000003us +4 ext4_buffered_write_iter [669701.751691 0.000005] [669701.755033 0.003347] [0.003342] +// 第4步说明ext4 文件系统耗时0.000002us (0.000005 - 0.000003) +5 generic_perform_write [669701.751703 0.000017] [669701.755032 0.003346] [0.003329] +// 下面是block层,看[duration]耗时0.003329us + +可以看到5s内系统调用write 25次,其中耗时最长的是:pid:30362 tid:30362 comm:ext4syscall_tes。 +写byte:10485760,耗时0.003350 us +其中vfs层耗时 0.000003us +ext4文件系统层耗时0.000017us +后续是IO block层耗时0.003329us diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4fsync/Makefile b/source/tools/detect/ext4tool/ext4syscall/ext4fsync/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6595ab4b38ae08a20faf418f4def7517586a5d4a --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4fsync/Makefile @@ -0,0 +1,9 @@ +newdirs += . +csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) +bpfsrcs += $(wildcard *.bpf.c) + +CFLAGS += -static + +target = ext4fsync + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync.bpf.c b/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..92116cb84ce26fbad5bf419d667cd2a3dc98230d --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync.bpf.c @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include +#include "ext4fsync.h" + +#define BPF_ANY 0 +#define NULL ((void*)0) + +struct bpf_map_def SEC("maps") fsync_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(unsigned long), + .value_size = sizeof(struct fsync), + .max_entries = 50000, +}; + +static int trace_entry(int n) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct fsync *call; + + call = bpf_map_lookup_elem(&fsync_map, &ptid); + if (call != NULL) + call->func[n].entrytime = bpf_ktime_get_ns(); + + return 0; +} + +static int trace_return(int n) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct fsync *call; + + call = bpf_map_lookup_elem(&fsync_map, &ptid); + if (call != NULL) + call->func[n].returntime = bpf_ktime_get_ns(); + + return 0; +} + +SEC("kprobe/vfs_fsync_range") +static int kprobe_vfs_fsync_range(struct pt_regs *ctx) +{ + struct file *file = (struct file*)PT_REGS_PARM1(ctx); + unsigned long ptid = bpf_get_current_pid_tgid(); + struct fsync call = {0}; + struct qstr qs; + + call.func[0].entrytime = bpf_ktime_get_ns(); + call.ptid = ptid; + bpf_get_current_comm(&call.comm, sizeof(call.comm)); + +// BPF_CORE_READ_INTO(&qs, file, f_path.dentry, d_name); +// bpf_core_fsync(&call.filename, sizeof(call.filename), &qs.name); +// BPF_CORE_READ_STR_INTO(&call.filename, file, f_path.dentry, d_name.name); + + bpf_map_update_elem(&fsync_map, &ptid, &call, BPF_ANY); + + return 0; +} +SEC("kretprobe/vfs_fsync_range") +static int kretprobe_vfs_fsync_range(struct pt_regs *ctx) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct fsync *call; + struct fsync call_new = {0}; + unsigned long key = 0; + + call = bpf_map_lookup_elem(&fsync_map, &ptid); + if (call != NULL) { + call->func[0].returntime = bpf_ktime_get_ns(); + + __builtin_memcpy(&call_new, call, sizeof(call_new)); + + bpf_map_delete_elem(&fsync_map, &ptid); + + // update map for new key + key = call_new.func[0].entrytime - ptid; + bpf_map_update_elem(&fsync_map, &key, &call_new, BPF_ANY); + } + + return 0; +} + +#define KPROBE_FUNC(func, n) \ +SEC("kprobe/"#func"") \ +static int kprobe_##func(struct pt_regs *ctx) \ +{ \ + trace_entry(n); \ + \ + return 0; \ +} \ +SEC("kretprobe/"#func"") \ +static int kretprobe_##func(struct pt_regs *ctx) \ +{ \ + trace_return(n); \ + \ + return 0; \ +} + +KPROBE_FUNC(ext4_sync_file, 1) + +KPROBE_FUNC(file_write_and_wait_range, 2) + +KPROBE_FUNC(ext4_force_commit, 3) + +KPROBE_FUNC(ext4_fsync_nojournal, 4) + +KPROBE_FUNC(ext4_fsync_journal, 5) + +KPROBE_FUNC(__generic_file_fsync, 6) + +KPROBE_FUNC(filemap_write_and_wait_range, 7) + +KPROBE_FUNC(jbd2_complete_transaction, 8) + +KPROBE_FUNC(generic_file_fsync, 9) + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync.c b/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync.c new file mode 100644 index 0000000000000000000000000000000000000000..d4d8a0ee20ee6e04d2210258b00087c124f80828 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync.c @@ -0,0 +1,234 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../ext4syscalllib.h" + +#include "ext4fsync.h" +#include "ext4fsync_info.h" +#include "ext4fsync.skel.h" + +const char *argp_program_version = "ext4fsync 1.0"; + +static const struct argp_option usage_options[] = { + {"time", 'l', "time", 0, "duration time"}, + {"pid", 'p', "pid", 0, "pid"}, + {"tid", 't', "tid", 0, "tid"}, + {"threshold", 's', "threshold", 0, "Show data that exceeds the threshold(us)"}, + {"btf", 'b', "BTF_PATH", 0, "Specify path of the custom btf"}, + {"debug", 'd', NULL, 0, "Enable libbpf debug output"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +static struct env +{ + int time; + int pid; + int tid; + int threshold; + bool debug; + char *btf_custom_path; +} env = { + .debug = false, + .btf_custom_path = NULL, +}; + +static struct ext4fsync_bpf *obj = NULL; + +static void bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (!env.debug) + return 0; + return vfprintf(stderr, format, args); +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case 'l': + env.time = atoi(arg); + break; + case 'p': + env.pid = atoi(arg); + break; + case 't': + env.tid = atoi(arg); + break; + case 's': + env.threshold = atoi(arg); + break; + case 'd': + env.debug = true; + break; + case 'b': + env.btf_custom_path = arg; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static void print_data(struct fsync info) +{ + int i; + + printf("pid:%d tid:%d comm:%s \n", info.pid, info.tid, info.comm); +// printf("fsyncfile:%s\n", info.filename); + printf("duration:%f\n", info.duration / SEC2USEC); + printf("step func [entrytime offset] [returntime offset] [duration]\n"); + for (i = 0; i < FUNC_NUM; i++) { + if (info.func[i].entrytime == 0 || info.func[i].returntime == 0) + continue; + + printf("%d %s [%f %f] [%f %f] [%f]\n", + info.func[i].step, syscallinfo[i].name, + (double)info.func[i].entrytime / SEC2USEC, + (double)(info.func[i].entrytime - info.func[0].entrytime) / SEC2USEC, + (double)info.func[i].returntime / SEC2USEC, + (double)(info.func[i].returntime - info.func[0].entrytime) / SEC2USEC, + info.func[i].duration / SEC2USEC); + } + printf("\n"); + +} + +static void record_data() +{ + int fd = bpf_map__fd(obj->maps.fsync_map); + unsigned long key, next_key; + struct fsync info; + struct fsync max = {0}; + int i, num = 0; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &info); + key = next_key; + + /* 1. calculate pid/tid/duration */ + info.pid = (unsigned int)(info.ptid >> 32); + info.tid = (unsigned int)info.ptid; + + for (i = 0; i < FUNC_NUM; i++) { + info.func[i].step = syscallinfo[i].step; + info.func[i].entrytime = info.func[i].entrytime / USEC2NSEC; + info.func[i].returntime = info.func[i].returntime / USEC2NSEC; + info.func[i].duration = + (double)(info.func[i].returntime - info.func[i].entrytime); + } + + info.duration = info.func[0].duration; + + /* check param */ + if (env.pid != 0 && env.pid != info.pid) + continue; + if (env.tid != 0 && env.tid != info.tid) + continue; + + if (env.threshold != 0 && env.threshold > info.duration) + continue; + + /* skip self */ + if (strcmp(info.comm, SELF_COMM) == 0) + continue; + + /* print info */ + print_data(info); + + /* record the number of fsync times and the max */ + num++; + + if (info.duration > max.duration) + memcpy(&max, &info, sizeof(info)); + } + + printf("============================\n"); + printf("syscall:[fsync] times:%d(%ds)\n\n", num, env.time); + printf("max_duration infomation:\n"); + print_data(max); + + return; +} + +static void alarm_stop(int signo) +{ + record_data(); +} + +int main(int argc, char **argv) +{ + int err; + struct bpf_program *prog; + static const struct argp argp = { + .options = usage_options, + .parser = parse_arg, + .args_doc = NULL, + }; + + bump_memlock_rlimit(); + libbpf_set_print(libbpf_print_fn); + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + obj = ext4fsync_bpf__open(); + if (!obj) + { + printf("failed to open BPF object\n"); + return 1; + } + + bpf_object__for_each_program(prog, obj->obj) { + if (!find_ksym_by_name(bpf_program__name(prog))) + bpf_program__set_autoload(prog, false); + } + + err = ext4fsync_bpf__load(obj); + if (err) + { + printf("failed to load BPF object: %s\n", strerror(-err)); + goto cleanup; + } + err = ext4fsync_bpf__attach(obj); + if (err) + { + printf("failed to attach BPF programs: %s\n", strerror(-err)); + goto cleanup; + } + + if (env.time == 0) + env.time = -1; + + signal(SIGINT, alarm_stop); + signal(SIGALRM, alarm_stop); + + alarm(env.time); + sleep(env.time + 1); + +cleanup: + ext4fsync_bpf__destroy(obj); + return 0; +} diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync.h b/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync.h new file mode 100644 index 0000000000000000000000000000000000000000..e69d7cf56ab53b2436f8438d665446b505595d9c --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync.h @@ -0,0 +1,28 @@ +#ifndef __EXT4OPEN_H +#define __EXT4OPEN_H + +#define TASK_COMM_LEN 16 +#define FILENAME_LEN 6 +#define FUNC_NUM 10 +#define USEC2NSEC 1000 +#define SEC2USEC 1000000 +#define SELF_COMM "ext4fsync" + +struct fsync_func { + unsigned int step; + unsigned long entrytime; + unsigned long returntime; + double duration; +}; + +struct fsync { + unsigned long ptid; + unsigned int pid; + unsigned int tid; + char comm[TASK_COMM_LEN]; + unsigned char filename[FILENAME_LEN]; + double duration; + struct fsync_func func[FUNC_NUM]; +}; + +#endif diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync_info.h b/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync_info.h new file mode 100644 index 0000000000000000000000000000000000000000..1a3a4fe57cc84adde5dcf6b2aa86d7b43143df26 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4fsync/ext4fsync_info.h @@ -0,0 +1,31 @@ +#ifndef __EXT4OPEN_INFO_H +#define __EXT4OPEN_INFO_H + +#define FUNC_MAX 30 + +struct struct_syscallinfo { + unsigned int number; + char name[FUNC_MAX]; + unsigned int step; +} syscallinfo[] = { + {0, "vfs_fsync_range", 1}, + {1, "ext4_sync_file", 2}, + + {2, "file_write_and_wait_range", 3}, + /* branch 1 for 4.19 */ + {3, "ext4_force_commit", 5}, + /* branch 2 for 4.19 */ + {4, "ext4_fsync_nojournal", 4}, + /* branch 3 for 4.19*/ + {5, "ext4_fsync_journal", 4}, + + /* for 4.9 */ + {6, "__generic_file_fsync", 3}, + {7, "filemap_write_and_wait_range", 4}, + {8, "jbd2_complete_transaction", 6}, + + /* for 3.10 */ + {9, "generic_file_fsync", 3}, +}; + +#endif diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4open/Makefile b/source/tools/detect/ext4tool/ext4syscall/ext4open/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..c1f8764866719b1adfc831530821f02440d46bc9 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4open/Makefile @@ -0,0 +1,9 @@ +newdirs += . +csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) +bpfsrcs += $(wildcard *.bpf.c) + +CFLAGS += -static + +target = ext4open + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open.bpf.c b/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..f4f6003c06e3a9be0b0ca539404dd68635f1947e --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open.bpf.c @@ -0,0 +1,119 @@ +#include +#include +#include +#include +#include +#include "ext4open.h" + +#define BPF_ANY 0 +#define NULL ((void*)0) + +struct bpf_map_def SEC("maps") open_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(unsigned long), + .value_size = sizeof(struct open), + .max_entries = 50000, +}; + +static int trace_entry(int n) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct open *call; + + call = bpf_map_lookup_elem(&open_map, &ptid); + if (call != NULL) + call->func[n].entrytime = bpf_ktime_get_ns(); + + return 0; +} + +static int trace_return(int n) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct open *call; + + call = bpf_map_lookup_elem(&open_map, &ptid); + if (call != NULL) + call->func[n].returntime = bpf_ktime_get_ns(); + + return 0; +} + +SEC("kprobe/do_sys_open") +static int kprobe_do_sys_open(struct pt_regs *ctx) +{ + char *filename = (char *)PT_REGS_PARM2(ctx); + unsigned long ptid = bpf_get_current_pid_tgid(); + struct open call = {0}; + + call.ptid = ptid; + bpf_get_current_comm(&call.comm, sizeof(call.comm)); + bpf_core_read(&call.filename, sizeof(call.filename) - 1, filename); + call.filename[FILENAME_LEN - 1] = '\0'; + call.func[0].entrytime = bpf_ktime_get_ns(); + + bpf_map_update_elem(&open_map, &ptid, &call, BPF_ANY); + + return 0; +} +SEC("kretprobe/do_sys_open") +static int kretprobe_do_sys_open(struct pt_regs *ctx) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct open *call; + struct open call_new = {0}; + unsigned long key = 0; + + call = bpf_map_lookup_elem(&open_map, &ptid); + if (call != NULL) { + call->func[0].returntime = bpf_ktime_get_ns(); + + __builtin_memcpy(&call_new, call, sizeof(call_new)); + + bpf_map_delete_elem(&open_map, &ptid); + + // update map for new key + key = call_new.func[0].entrytime - ptid; + bpf_map_update_elem(&open_map, &key, &call_new, BPF_ANY); + } + + return 0; +} + +#define KPROBE_FUNC(func, n) \ +SEC("kprobe/"#func"") \ +static int kprobe_##func(struct pt_regs *ctx) \ +{ \ + trace_entry(n); \ + \ + return 0; \ +} \ +SEC("kretprobe/"#func"") \ +static int kretprobe_##func(struct pt_regs *ctx) \ +{ \ + trace_return(n); \ + \ + return 0; \ +} + +KPROBE_FUNC(getname, 1) + +KPROBE_FUNC(path_openat, 2) + +KPROBE_FUNC(vfs_tmpfile, 3) + +KPROBE_FUNC(path_init, 4) + +KPROBE_FUNC(vfs_open, 5) + +KPROBE_FUNC(ext4_file_open, 6) + +KPROBE_FUNC(generic_file_open, 7) + +KPROBE_FUNC(__dquot_initialize, 8) + +KPROBE_FUNC(terminate_walk, 9) + +KPROBE_FUNC(putname, 10) + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open.c b/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open.c new file mode 100644 index 0000000000000000000000000000000000000000..bbf7422d1a81dc1580d4ce175f5e69c5905eb18d --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open.c @@ -0,0 +1,226 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ext4open.h" +#include "ext4open_info.h" +#include "ext4open.skel.h" + +const char *argp_program_version = "ext4open 1.0"; + +static const struct argp_option usage_options[] = { + {"time", 'l', "time", 0, "duration time"}, + {"pid", 'p', "pid", 0, "pid"}, + {"tid", 't', "tid", 0, "tid"}, + {"threshold", 's', "threshold", 0, "Show data that exceeds the threshold(us)"}, + {"btf", 'b', "BTF_PATH", 0, "Specify path of the custom btf"}, + {"debug", 'd', NULL, 0, "Enable libbpf debug output"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +static struct env +{ + int time; + int pid; + int tid; + int threshold; + bool debug; + char *btf_custom_path; +} env = { + .debug = false, + .btf_custom_path = NULL, +}; + +static struct ext4open_bpf *obj = NULL; + +static void bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (!env.debug) + return 0; + return vfprintf(stderr, format, args); +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case 'l': + env.time = atoi(arg); + break; + case 'p': + env.pid = atoi(arg); + break; + case 't': + env.tid = atoi(arg); + break; + case 's': + env.threshold = atoi(arg); + break; + case 'd': + env.debug = true; + break; + case 'b': + env.btf_custom_path = arg; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static void print_data(struct open info) +{ + int i; + + printf("pid:%d tid:%d comm:%s \n", info.pid, info.tid, info.comm); + printf("openfile:%s\n", info.filename); + printf("duration:%f\n", info.duration / SEC2USEC); + printf("step func [entrytime offset] [returntime offset] [duration]\n"); + for (i = 0; i < FUNC_NUM; i++) { + if (info.func[i].entrytime == 0 || info.func[i].returntime == 0) + continue; + + printf("%d %s [%f %f] [%f %f] [%f]\n", + info.func[i].step, syscallinfo[i].name, + (double)info.func[i].entrytime / SEC2USEC, + (double)(info.func[i].entrytime - info.func[0].entrytime) / SEC2USEC, + (double)info.func[i].returntime / SEC2USEC, + (double)(info.func[i].returntime - info.func[0].entrytime) / SEC2USEC, + info.func[i].duration / SEC2USEC); + } + printf("\n"); + +} + +static void record_data() +{ + int fd = bpf_map__fd(obj->maps.open_map); + unsigned long key, next_key; + struct open info; + struct open max = {0}; + int i, num = 0; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &info); + key = next_key; + + /* 1. calculate pid/tid/duration */ + info.pid = (unsigned int)(info.ptid >> 32); + info.tid = (unsigned int)info.ptid; + + for (i = 0; i < FUNC_NUM; i++) { + info.func[i].step = syscallinfo[i].step; + info.func[i].entrytime = info.func[i].entrytime / USEC2NSEC; + info.func[i].returntime = info.func[i].returntime / USEC2NSEC; + info.func[i].duration = + (double)(info.func[i].returntime - info.func[i].entrytime); + } + + info.duration = info.func[0].duration; + + /* check param */ + if (env.pid != 0 && env.pid != info.pid) + continue; + if (env.tid != 0 && env.tid != info.tid) + continue; + + if (env.threshold != 0 && env.threshold > info.duration) + continue; + + /* skip self */ + if (strcmp(info.comm, SELF_COMM) == 0) + continue; + + /* print info */ + print_data(info); + + /* record the number of open times and the max */ + num++; + + if (info.duration > max.duration) + memcpy(&max, &info, sizeof(info)); + } + + printf("============================\n"); + printf("syscall:open times:%d(%ds)\n", num, env.time); + printf("max_duration infomation:\n"); + print_data(max); + + return; +} + +static void alarm_stop(int signo) +{ + record_data(); +} + +int main(int argc, char **argv) +{ + int err; + static const struct argp argp = { + .options = usage_options, + .parser = parse_arg, + .args_doc = NULL, + }; + + bump_memlock_rlimit(); + libbpf_set_print(libbpf_print_fn); + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + obj = ext4open_bpf__open(); + if (!obj) + { + printf("failed to open BPF object\n"); + return 1; + } + + err = ext4open_bpf__load(obj); + if (err) + { + printf("failed to load BPF object: %s\n", strerror(-err)); + goto cleanup; + } + err = ext4open_bpf__attach(obj); + if (err) + { + printf("failed to attach BPF programs: %s\n", strerror(-err)); + goto cleanup; + } + + if (env.time == 0) + env.time = -1; + + signal(SIGINT, alarm_stop); + signal(SIGALRM, alarm_stop); + + alarm(env.time); + sleep(env.time + 1); + +cleanup: + ext4open_bpf__destroy(obj); + return 0; +} diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open.h b/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open.h new file mode 100644 index 0000000000000000000000000000000000000000..8fcea372b720b203147c50ebd705e37923ef59e3 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open.h @@ -0,0 +1,28 @@ +#ifndef __EXT4OPEN_H +#define __EXT4OPEN_H + +#define TASK_COMM_LEN 16 +#define FILENAME_LEN 40 +#define FUNC_NUM 11 +#define USEC2NSEC 1000 +#define SEC2USEC 1000000 +#define SELF_COMM "ext4open" + +struct open_func { + unsigned int step; + unsigned long entrytime; + unsigned long returntime; + double duration; +}; + +struct open { + unsigned long ptid; + unsigned int pid; + unsigned int tid; + char comm[TASK_COMM_LEN]; + char filename[FILENAME_LEN]; + double duration; + struct open_func func[FUNC_NUM]; +}; + +#endif diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open_info.h b/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open_info.h new file mode 100644 index 0000000000000000000000000000000000000000..6425a3ba93c1e6e14d8c5584deb5428b3ebb622d --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4open/ext4open_info.h @@ -0,0 +1,29 @@ +#ifndef __EXT4OPEN_INFO_H +#define __EXT4OPEN_INFO_H + +#define FUNC_MAX 30 + +struct struct_syscallinfo { + unsigned int number; + char name[FUNC_MAX]; + unsigned int step; +} syscallinfo[] = { + {0, "do_sys_open", 1}, + {1, "getname", 2}, + {2, "path_openat", 3}, + + /* branch 1 */ + {3, "vfs_tmpfile", 4}, + /* branch 2.1 */ + {4, "path_init", 4}, + {5, "vfs_open", 5}, + /* branch 2.2 */ + {6, "ext4_file_open", 6}, + {7, "generic_file_open", 7}, + {8, "__dquot_initialize", 8}, + {9, "terminate_walk", 9}, + + {10, "putname", 10}, +}; + +#endif diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4read/Makefile b/source/tools/detect/ext4tool/ext4syscall/ext4read/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..29594ba95fa7647fffc19330895518e158162240 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4read/Makefile @@ -0,0 +1,9 @@ +newdirs += . +csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) +bpfsrcs += $(wildcard *.bpf.c) + +CFLAGS += -static + +target = ext4read + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read.bpf.c b/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..0774c6c9e61909d7475b1f92b98dd77e0fdf2bf1 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read.bpf.c @@ -0,0 +1,119 @@ +#include +#include +#include +#include +#include +#include "ext4read.h" + +#define BPF_ANY 0 +#define NULL ((void*)0) + +struct bpf_map_def SEC("maps") read_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(unsigned long), + .value_size = sizeof(struct read), + .max_entries = 50000, +}; + +static int trace_entry(int n) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct read *call; + + call = bpf_map_lookup_elem(&read_map, &ptid); + if (call != NULL) + call->func[n].entrytime = bpf_ktime_get_ns(); + + return 0; +} + +static int trace_return(int n) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct read *call; + + call = bpf_map_lookup_elem(&read_map, &ptid); + if (call != NULL) + call->func[n].returntime = bpf_ktime_get_ns(); + + return 0; +} + +SEC("kprobe/vfs_read") +static int kprobe_vfs_read(struct pt_regs *ctx) +{ + struct file *file = (struct file*)PT_REGS_PARM1(ctx); + unsigned long ptid = bpf_get_current_pid_tgid(); + struct read call = {0}; + struct qstr qs; + + call.func[0].entrytime = bpf_ktime_get_ns(); + call.ptid = ptid; + bpf_get_current_comm(&call.comm, sizeof(call.comm)); + call.byte = (size_t)PT_REGS_PARM3(ctx); + +// BPF_CORE_READ_INTO(&qs, file, f_path.dentry, d_name); +// bpf_core_read(&call.filename, sizeof(call.filename), &qs.name); +// BPF_CORE_READ_STR_INTO(&call.filename, file, f_path.dentry, d_name.name); + + bpf_map_update_elem(&read_map, &ptid, &call, BPF_ANY); + + return 0; +} +SEC("kretprobe/vfs_read") +static int kretprobe_vfs_read(struct pt_regs *ctx) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct read *call; + struct read call_new = {0}; + unsigned long key = 0; + + call = bpf_map_lookup_elem(&read_map, &ptid); + if (call != NULL) { + call->func[0].returntime = bpf_ktime_get_ns(); + + __builtin_memcpy(&call_new, call, sizeof(call_new)); + + bpf_map_delete_elem(&read_map, &ptid); + + // update map for new key + key = call_new.func[0].entrytime - ptid; + bpf_map_update_elem(&read_map, &key, &call_new, BPF_ANY); + } + + return 0; +} + +#define KPROBE_FUNC(func, n) \ +SEC("kprobe/"#func"") \ +static int kprobe_##func(struct pt_regs *ctx) \ +{ \ + trace_entry(n); \ + \ + return 0; \ +} \ +SEC("kretprobe/"#func"") \ +static int kretprobe_##func(struct pt_regs *ctx) \ +{ \ + trace_return(n); \ + \ + return 0; \ +} + +KPROBE_FUNC(ext4_file_read_iter, 1) + +KPROBE_FUNC(ext4_dax_read_iter, 2) + +KPROBE_FUNC(dax_iomap_rw, 3) + +KPROBE_FUNC(generic_file_read_iter, 4) + +KPROBE_FUNC(noop_direct_IO, 5) + +KPROBE_FUNC(generic_file_buffered_read, 6) + +KPROBE_FUNC(ext4_dio_read_iter, 7) + +KPROBE_FUNC(generic_file_aio_read, 8) + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read.c b/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read.c new file mode 100644 index 0000000000000000000000000000000000000000..98af12ed70e984943cf2852e96b641e868ecbf89 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read.c @@ -0,0 +1,235 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../ext4syscalllib.h" + +#include "ext4read.h" +#include "ext4read_info.h" +#include "ext4read.skel.h" + +const char *argp_program_version = "ext4read 1.0"; + +static const struct argp_option usage_options[] = { + {"time", 'l', "time", 0, "duration time"}, + {"pid", 'p', "pid", 0, "pid"}, + {"tid", 't', "tid", 0, "tid"}, + {"threshold", 's', "threshold", 0, "Show data that exceeds the threshold(us)"}, + {"btf", 'b', "BTF_PATH", 0, "Specify path of the custom btf"}, + {"debug", 'd', NULL, 0, "Enable libbpf debug output"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +static struct env +{ + int time; + int pid; + int tid; + int threshold; + bool debug; + char *btf_custom_path; +} env = { + .debug = false, + .btf_custom_path = NULL, +}; + +static struct ext4read_bpf *obj = NULL; + +static void bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (!env.debug) + return 0; + return vfprintf(stderr, format, args); +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case 'l': + env.time = atoi(arg); + break; + case 'p': + env.pid = atoi(arg); + break; + case 't': + env.tid = atoi(arg); + break; + case 's': + env.threshold = atoi(arg); + break; + case 'd': + env.debug = true; + break; + case 'b': + env.btf_custom_path = arg; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static void print_data(struct read info) +{ + int i; + + printf("pid:%d tid:%d comm:%s \n", info.pid, info.tid, info.comm); +// printf("readfile:%s\n", info.filename); + printf("byte:%lu\n", info.byte); + printf("duration:%f\n", info.duration / SEC2USEC); + printf("step func [entrytime offset] [returntime offset] [duration]\n"); + for (i = 0; i < FUNC_NUM; i++) { + if (info.func[i].entrytime == 0 || info.func[i].returntime == 0) + continue; + + printf("%d %s [%f %f] [%f %f] [%f]\n", + info.func[i].step, syscallinfo[i].name, + (double)info.func[i].entrytime / SEC2USEC, + (double)(info.func[i].entrytime - info.func[0].entrytime) / SEC2USEC, + (double)info.func[i].returntime / SEC2USEC, + (double)(info.func[i].returntime - info.func[0].entrytime) / SEC2USEC, + info.func[i].duration / SEC2USEC); + } + printf("\n"); + +} + +static void record_data() +{ + int fd = bpf_map__fd(obj->maps.read_map); + unsigned long key, next_key; + struct read info; + struct read max = {0}; + int i, num = 0; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &info); + key = next_key; + + /* 1. calculate pid/tid/duration */ + info.pid = (unsigned int)(info.ptid >> 32); + info.tid = (unsigned int)info.ptid; + + for (i = 0; i < FUNC_NUM; i++) { + info.func[i].step = syscallinfo[i].step; + info.func[i].entrytime = info.func[i].entrytime / USEC2NSEC; + info.func[i].returntime = info.func[i].returntime / USEC2NSEC; + info.func[i].duration = + (double)(info.func[i].returntime - info.func[i].entrytime); + } + + info.duration = info.func[0].duration; + + /* check param */ + if (env.pid != 0 && env.pid != info.pid) + continue; + if (env.tid != 0 && env.tid != info.tid) + continue; + + if (env.threshold != 0 && env.threshold > info.duration) + continue; + + /* skip self */ + if (strcmp(info.comm, SELF_COMM) == 0) + continue; + + /* print info */ + print_data(info); + + /* record the number of read times and the max */ + num++; + + if (info.duration > max.duration) + memcpy(&max, &info, sizeof(info)); + } + + printf("============================\n"); + printf("syscall:[read] times:%d(%ds)\n\n", num, env.time); + printf("max_duration infomation:\n"); + print_data(max); + + return; +} + +static void alarm_stop(int signo) +{ + record_data(); +} + +int main(int argc, char **argv) +{ + int err; + struct bpf_program *prog; + static const struct argp argp = { + .options = usage_options, + .parser = parse_arg, + .args_doc = NULL, + }; + + bump_memlock_rlimit(); + libbpf_set_print(libbpf_print_fn); + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + obj = ext4read_bpf__open(); + if (!obj) + { + printf("failed to open BPF object\n"); + return 1; + } + + bpf_object__for_each_program(prog, obj->obj) { + if (!find_ksym_by_name(bpf_program__name(prog))) + bpf_program__set_autoload(prog, false); + } + + err = ext4read_bpf__load(obj); + if (err) + { + printf("failed to load BPF object: %s\n", strerror(-err)); + goto cleanup; + } + err = ext4read_bpf__attach(obj); + if (err) + { + printf("failed to attach BPF programs: %s\n", strerror(-err)); + goto cleanup; + } + + if (env.time == 0) + env.time = -1; + + signal(SIGINT, alarm_stop); + signal(SIGALRM, alarm_stop); + + alarm(env.time); + sleep(env.time + 1); + +cleanup: + ext4read_bpf__destroy(obj); + return 0; +} diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read.h b/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read.h new file mode 100644 index 0000000000000000000000000000000000000000..a061ebd2bb6a57b2b1f449cdcd0e02c7c0239a5a --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read.h @@ -0,0 +1,29 @@ +#ifndef __EXT4OPEN_H +#define __EXT4OPEN_H + +#define TASK_COMM_LEN 16 +#define FILENAME_LEN 6 +#define FUNC_NUM 9 +#define USEC2NSEC 1000 +#define SEC2USEC 1000000 +#define SELF_COMM "ext4read" + +struct read_func { + unsigned int step; + unsigned long entrytime; + unsigned long returntime; + double duration; +}; + +struct read { + unsigned long ptid; + unsigned int pid; + unsigned int tid; + char comm[TASK_COMM_LEN]; + unsigned char filename[FILENAME_LEN]; + size_t byte; + double duration; + struct read_func func[FUNC_NUM]; +}; + +#endif diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read_info.h b/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read_info.h new file mode 100644 index 0000000000000000000000000000000000000000..347fead6bf109afde32589c288b0868d17d7757e --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4read/ext4read_info.h @@ -0,0 +1,28 @@ +#ifndef __EXT4OPEN_INFO_H +#define __EXT4OPEN_INFO_H + +#define FUNC_MAX 30 + +struct struct_syscallinfo { + unsigned int number; + char name[FUNC_MAX]; + unsigned int step; +} syscallinfo[] = { + {0, "vfs_read", 1}, + {1, "ext4_file_read_iter", 2}, + + /* branch 1 for 4.19 */ + {2, "ext4_dax_read_iter", 3}, + {3, "dax_iomap_rw", 4}, + /* branch 2 for 4.19 */ + {4, "generic_file_read_iter", 3}, + {5, "noop_direct_IO", 4}, + {6, "generic_file_buffered_read", 4}, + /* branch 3 for 4.19*/ + {7, "ext4_dio_read_iter", 3}, + + /* for 3.10 */ + {8, "generic_file_aio_read", 3}, +}; + +#endif diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4syscall/Makefile b/source/tools/detect/ext4tool/ext4syscall/ext4syscall/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5a23a1627b9d7a0c1b9c276fce370bcb78aae981 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4syscall/Makefile @@ -0,0 +1,3 @@ +target = ext4syscall + +include $(SRC)/mk/sh.mk diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4syscall/ext4syscall.sh b/source/tools/detect/ext4tool/ext4syscall/ext4syscall/ext4syscall.sh new file mode 100755 index 0000000000000000000000000000000000000000..f95c6a2bc8ee554cc911f7d43df6fb3f1ab5c94d --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4syscall/ext4syscall.sh @@ -0,0 +1,62 @@ +#!/bin/bash +#****************************************************************# +# ScriptName: ext4syscall.sh +# Author: Bixuan Cui +# Create Date: 2022-03-01 +# Function: +#***************************************************************# +if [ "$SYSAK_WORK_PATH" != "" ]; then +WORK_PATH=$SYSAK_WORK_PATH +else +WORK_PATH=/usr/local/sbin/.sysak_compoents +fi + +EXT4OPEN_BIN=$WORK_PATH/tools/ext4open +EXT4WRITE_BIN=$WORK_PATH/tools/ext4write +EXT4READ_BIN=$WORK_PATH/tools/ext4read +EXT4FSYNC_BIN=$WORK_PATH/tools/ext4fsync + +# arguments +usage() +{ + echo "ext4syscall: Trace and analyze the syscalls of open/write/read/fsync in ext4" + echo "Usage:" + echo " sysak ext4syscall [options] [sysname args]]" + echo " options: -h, help information" + echo " sysname:" + echo " open/write/read/fsync" + echo " args:" + echo " args for the syscall, --help get more" + echo "Examples:" + echo " sysak ext4syscall -l" + echo " sysak ext4syscall write --help" +} + +while getopts 'hl' OPT; do + case $OPT in + "h") + usage + exit 0 + ;; + *) + usage + exit 0 + ;; + esac +done + +sysname=${@:$OPTIND:1} +cmd=${*:$OPTIND+1}; + +if [ "X${sysname}" == "Xopen" ]; then + $EXT4OPEN_BIN $cmd +elif [ "X${sysname}" == "Xwrite" ]; then + $EXT4WRITE_BIN $cmd +elif [ "X${sysname}" == "Xread" ]; then + $EXT4READ_BIN $cmd +elif [ "X${sysname}" == "Xfsync" ]; then + $EXT4FSYNC_BIN $cmd +else + echo "not support syscall: ${sysname}" + exit -1 +fi diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4syscalllib.h b/source/tools/detect/ext4tool/ext4syscall/ext4syscalllib.h new file mode 100644 index 0000000000000000000000000000000000000000..3eb3e804b348dc159957a2d97ef3f3c7b85d22b0 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4syscalllib.h @@ -0,0 +1,34 @@ +#ifndef __CGTRACELIB_H +#define __CGTRACELIB_H + +#include +#include +#include +#include + +#define CMD_LEN 1024 +#define SYM_LEN 20 + +static bool find_ksym_by_name(const char *name) +{ + FILE *fp = NULL; + char cmd[CMD_LEN]; + char buf[SYM_LEN] = "\0"; + + sprintf(cmd, "FIND_KSYM=`echo %s |awk -F'probe_' '{print $2}'`; cat /proc/kallsyms |grep -w $FIND_KSYM", name); + + fp = popen(cmd, "r"); + if (fp == NULL) + return false; + + fgets(buf, SYM_LEN, fp); + + pclose(fp); + + if (strlen(buf) == 0) + return false; + else + return true; +} + +#endif diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4write/Makefile b/source/tools/detect/ext4tool/ext4syscall/ext4write/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..fc563eff514d4496818ce623c2154020c117c0ec --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4write/Makefile @@ -0,0 +1,9 @@ +newdirs += . +csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) +bpfsrcs += $(wildcard *.bpf.c) + +CFLAGS += -static + +target = ext4write + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write.bpf.c b/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..e6f60baec464e4cafc77de04f28e1afba4b2c781 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write.bpf.c @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include +#include "ext4write.h" + +#define BPF_ANY 0 +#define NULL ((void*)0) + +struct bpf_map_def SEC("maps") write_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(unsigned long), + .value_size = sizeof(struct write), + .max_entries = 50000, +}; + +static int trace_entry(int n) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct write *call; + + call = bpf_map_lookup_elem(&write_map, &ptid); + if (call != NULL) + call->func[n].entrytime = bpf_ktime_get_ns(); + + return 0; +} + +static int trace_return(int n) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct write *call; + + call = bpf_map_lookup_elem(&write_map, &ptid); + if (call != NULL) + call->func[n].returntime = bpf_ktime_get_ns(); + + return 0; +} + +SEC("kprobe/vfs_write") +static int kprobe_vfs_write(struct pt_regs *ctx) +{ + struct file *file = (struct file*)PT_REGS_PARM1(ctx); + unsigned long ptid = bpf_get_current_pid_tgid(); + struct write call = {0}; + struct qstr qs; + + call.func[0].entrytime = bpf_ktime_get_ns(); + call.ptid = ptid; + bpf_get_current_comm(&call.comm, sizeof(call.comm)); + call.byte = (size_t)PT_REGS_PARM3(ctx); + +// BPF_CORE_READ_INTO(&qs, file, f_path.dentry, d_name); +// bpf_core_read(&call.filename, sizeof(call.filename), &qs.name); +// BPF_CORE_READ_STR_INTO(&call.filename, file, f_path.dentry, d_name.name); + + bpf_map_update_elem(&write_map, &ptid, &call, BPF_ANY); + + return 0; +} +SEC("kretprobe/vfs_write") +static int kretprobe_vfs_write(struct pt_regs *ctx) +{ + unsigned long ptid = bpf_get_current_pid_tgid(); + struct write *call; + struct write call_new = {0}; + unsigned long key = 0; + + call = bpf_map_lookup_elem(&write_map, &ptid); + if (call != NULL) { + call->func[0].returntime = bpf_ktime_get_ns(); + + __builtin_memcpy(&call_new, call, sizeof(call_new)); + + bpf_map_delete_elem(&write_map, &ptid); + + // update map for new key + key = call_new.func[0].entrytime - ptid; + bpf_map_update_elem(&write_map, &key, &call_new, BPF_ANY); + } + + return 0; +} + +#define KPROBE_FUNC(func, n) \ +SEC("kprobe/"#func"") \ +static int kprobe_##func(struct pt_regs *ctx) \ +{ \ + trace_entry(n); \ + \ + return 0; \ +} \ +SEC("kretprobe/"#func"") \ +static int kretprobe_##func(struct pt_regs *ctx) \ +{ \ + trace_return(n); \ + \ + return 0; \ +} + +KPROBE_FUNC(file_start_write, 1) + +KPROBE_FUNC(ext4_file_write_iter, 2) + +KPROBE_FUNC(ext4_dax_write_iter, 3) + +KPROBE_FUNC(dax_iomap_rw, 4) + +KPROBE_FUNC(ext4_dio_write_iter, 5) + +KPROBE_FUNC(ext4_buffered_write_iter, 6) + +KPROBE_FUNC(generic_perform_write, 7) + +KPROBE_FUNC(__generic_file_write_iter, 8) + +KPROBE_FUNC(ext4_file_write, 9) + +KPROBE_FUNC(generic_write_sync, 10) + +KPROBE_FUNC(file_end_write, 11) + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write.c b/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write.c new file mode 100644 index 0000000000000000000000000000000000000000..edd9251e648f3c9bb125148bf7dd219f356d9fbd --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write.c @@ -0,0 +1,235 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../ext4syscalllib.h" + +#include "ext4write.h" +#include "ext4write_info.h" +#include "ext4write.skel.h" + +const char *argp_program_version = "ext4write 1.0"; + +static const struct argp_option usage_options[] = { + {"time", 'l', "time", 0, "duration time"}, + {"pid", 'p', "pid", 0, "pid"}, + {"tid", 't', "tid", 0, "tid"}, + {"threshold", 's', "threshold", 0, "Show data that exceeds the threshold(us)"}, + {"btf", 'b', "BTF_PATH", 0, "Specify path of the custom btf"}, + {"debug", 'd', NULL, 0, "Enable libbpf debug output"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +static struct env +{ + int time; + int pid; + int tid; + int threshold; + bool debug; + char *btf_custom_path; +} env = { + .debug = false, + .btf_custom_path = NULL, +}; + +static struct ext4write_bpf *obj = NULL; + +static void bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (!env.debug) + return 0; + return vfprintf(stderr, format, args); +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case 'l': + env.time = atoi(arg); + break; + case 'p': + env.pid = atoi(arg); + break; + case 't': + env.tid = atoi(arg); + break; + case 's': + env.threshold = atoi(arg); + break; + case 'd': + env.debug = true; + break; + case 'b': + env.btf_custom_path = arg; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static void print_data(struct write info) +{ + int i; + + printf("pid:%d tid:%d comm:%s \n", info.pid, info.tid, info.comm); +// printf("writefile:%s\n", info.filename); + printf("byte:%lu\n", info.byte); + printf("duration:%f\n", info.duration / SEC2USEC); + printf("step func [entrytime offset] [returntime offset] [duration]\n"); + for (i = 0; i < FUNC_NUM; i++) { + if (info.func[i].entrytime == 0 || info.func[i].returntime == 0) + continue; + + printf("%d %s [%f %f] [%f %f] [%f]\n", + info.func[i].step, syscallinfo[i].name, + (double)info.func[i].entrytime / SEC2USEC, + (double)(info.func[i].entrytime - info.func[0].entrytime) / SEC2USEC, + (double)info.func[i].returntime / SEC2USEC, + (double)(info.func[i].returntime - info.func[0].entrytime) / SEC2USEC, + info.func[i].duration / SEC2USEC); + } + printf("\n"); + +} + +static void record_data() +{ + int fd = bpf_map__fd(obj->maps.write_map); + unsigned long key, next_key; + struct write info; + struct write max = {0}; + int i, num = 0; + + while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { + bpf_map_lookup_elem(fd, &next_key, &info); + key = next_key; + + /* 1. calculate pid/tid/duration */ + info.pid = (unsigned int)(info.ptid >> 32); + info.tid = (unsigned int)info.ptid; + + for (i = 0; i < FUNC_NUM; i++) { + info.func[i].step = syscallinfo[i].step; + info.func[i].entrytime = info.func[i].entrytime / USEC2NSEC; + info.func[i].returntime = info.func[i].returntime / USEC2NSEC; + info.func[i].duration = + (double)(info.func[i].returntime - info.func[i].entrytime); + } + + info.duration = info.func[0].duration; + + /* check param */ + if (env.pid != 0 && env.pid != info.pid) + continue; + if (env.tid != 0 && env.tid != info.tid) + continue; + + if (env.threshold != 0 && env.threshold > info.duration) + continue; + + /* skip self */ + if (strcmp(info.comm, SELF_COMM) == 0) + continue; + + /* print info */ + print_data(info); + + /* record the number of write times and the max */ + num++; + + if (info.duration > max.duration) + memcpy(&max, &info, sizeof(info)); + } + + printf("============================\n"); + printf("syscall:[write] times:%d(%ds)\n\n", num, env.time); + printf("max_duration infomation:\n"); + print_data(max); + + return; +} + +static void alarm_stop(int signo) +{ + record_data(); +} + +int main(int argc, char **argv) +{ + int err; + struct bpf_program *prog; + static const struct argp argp = { + .options = usage_options, + .parser = parse_arg, + .args_doc = NULL, + }; + + bump_memlock_rlimit(); + libbpf_set_print(libbpf_print_fn); + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + obj = ext4write_bpf__open(); + if (!obj) + { + printf("failed to open BPF object\n"); + return 1; + } + + bpf_object__for_each_program(prog, obj->obj) { + if (!find_ksym_by_name(bpf_program__name(prog))) + bpf_program__set_autoload(prog, false); + } + + err = ext4write_bpf__load(obj); + if (err) + { + printf("failed to load BPF object: %s\n", strerror(-err)); + goto cleanup; + } + err = ext4write_bpf__attach(obj); + if (err) + { + printf("failed to attach BPF programs: %s\n", strerror(-err)); + goto cleanup; + } + + if (env.time == 0) + env.time = -1; + + signal(SIGINT, alarm_stop); + signal(SIGALRM, alarm_stop); + + alarm(env.time); + sleep(env.time + 1); + +cleanup: + ext4write_bpf__destroy(obj); + return 0; +} diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write.h b/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write.h new file mode 100644 index 0000000000000000000000000000000000000000..9768acdb9a476e13973be6689d96f7c34d15315d --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write.h @@ -0,0 +1,29 @@ +#ifndef __EXT4OPEN_H +#define __EXT4OPEN_H + +#define TASK_COMM_LEN 16 +#define FILENAME_LEN 6 +#define FUNC_NUM 12 +#define USEC2NSEC 1000 +#define SEC2USEC 1000000 +#define SELF_COMM "ext4write" + +struct write_func { + unsigned int step; + unsigned long entrytime; + unsigned long returntime; + double duration; +}; + +struct write { + unsigned long ptid; + unsigned int pid; + unsigned int tid; + char comm[TASK_COMM_LEN]; + unsigned char filename[FILENAME_LEN]; + size_t byte; + double duration; + struct write_func func[FUNC_NUM]; +}; + +#endif diff --git a/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write_info.h b/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write_info.h new file mode 100644 index 0000000000000000000000000000000000000000..33ea0863b14219c8f03c4035a8d908ba70c7ef83 --- /dev/null +++ b/source/tools/detect/ext4tool/ext4syscall/ext4write/ext4write_info.h @@ -0,0 +1,34 @@ +#ifndef __EXT4OPEN_INFO_H +#define __EXT4OPEN_INFO_H + +#define FUNC_MAX 30 + +struct struct_syscallinfo { + unsigned int number; + char name[FUNC_MAX]; + unsigned int step; +} syscallinfo[] = { + {0, "vfs_write", 1}, + {1, "file_start_write", 2}, + {2, "ext4_file_write_iter", 3}, + + /* branch 1 for 4.19 */ + {3, "ext4_dax_write_iter", 4}, + {4, "dax_iomap_rw", 5}, + /* branch 2 for 4.19 */ + {5, "ext4_dio_write_iter", 4}, + /* branch 3 for 4.19*/ + {6, "ext4_buffered_write_iter", 4}, + {7, "generic_perform_write", 5}, + + /* for 4.9 */ + {8, "__generic_file_write_iter", 4}, + + /* for 3.10 */ + {9, "ext4_file_write", 4}, + + {10, "generic_write_sync", 6}, + {11, "file_end_write", 7}, +}; + +#endif