diff --git a/source/tools/monitor/sched/schedmoni/Makefile b/source/tools/monitor/sched/schedmoni/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f9db0eb77b62ea52d300c99eaf2087ea0d934c59 --- /dev/null +++ b/source/tools/monitor/sched/schedmoni/Makefile @@ -0,0 +1,9 @@ + +newdirs := $(shell find ./ -type d) + +bpfsrcs := $(wildcard bpf/*.bpf.c) +csrcs := $(wildcard *.c) +target := schedmoni + +LDFLAGS += -pthread +include $(SRC)/mk/bpf.mk diff --git a/source/tools/monitor/sched/schedmoni/README.txt b/source/tools/monitor/sched/schedmoni/README.txt new file mode 100644 index 0000000000000000000000000000000000000000..46457a5def29314ce0ce08aaf9800860cd4e9510 --- /dev/null +++ b/source/tools/monitor/sched/schedmoni/README.txt @@ -0,0 +1,77 @@ +1 run the nosched + $ sudo ./out/sysak nosched #default threshold 10ms + or + $ sudo ./out/sysak nosched -t 1 #set the threshold to 1ms + +The out looks like: +sudo ./out/sysak nosched -t 1 +Threshold set to 1 ms +libbpf: loading object 'nosched_bpf' from buffer +..... (#a lot of messages) +Successfully started! Please run `sudo cat /sys/kernel/debug/tracing/trace_pipe` to see output of the BPF programs. +Running.... + tips:Ctl+c show the result! + +2 get the result +2.1 use trace_pipe(Optional, for debug) + $ sudo cat /sys/kernel/debug/tracing/trace_pipe + <...>-110777 [014] dNh. 19777.314467: 0: cc1 :lat is 1001 us, 1 ticks + <...>-110849 [016] dNh. 19777.556471: 0: cc1 :lat is 1000 us, 1 ticks + <...>-110712 [000] dNh. 19777.932467: 0: cc1 :lat is 1005 us, 1 ticks +2.2 stop the process and get the result +We enter the "Ctl+c" to stop the process, the result looks as follows: +Running.... + tips:Ctl+c show the result! +^C +*********************************** +cc1<116321> [19795.442018507]: lat=4000us, lat_tick=4 + apic_timer_interrupt + __lock_text_start + release_pages + tlb_flush_mmu_free + arch_tlb_finish_mmu + tlb_finish_mmu + exit_mmap + mmput + do_exit + do_group_exit + get_signal + do_signal + exit_to_usermode_loop + prepare_exit_to_usermode + swapgs_restore_regs_and_return_to_usermode +---------------------- +cc1<111581> [19775.265934964]: lat=1005us, lat_tick=1 + apic_timer_interrupt + free_unref_page_list + release_pages + tlb_flush_mmu_free + arch_tlb_finish_mmu + tlb_finish_mmu + exit_mmap + mmput + do_exit + do_group_exit + __x64_sys_exit_group + do_syscall_64 + entry_SYSCALL_64_after_hwframe +..........(#a lot of messages) + +3 the results +3.1 headers + comm&pid timestamp latency(us) latency(tick) + | | | | +cc1<111581> [19775.265934964]: lat=1005us, lat_tick=1 + +comm&pid: The name(or comm) and pid of the task which with need_to_resched flag but didn't schedle() for threshold time. +timestamp: The timestamp when no_sched happened. +latency(us): How many us the task with need_to_resched flag has no schedule(). +latency(tick): Likes latency, but takes ticks as count. + +3.2 stack +The stack back-trace of the current(the murderer) context. + apic_timer_interrupt + free_unref_page_list + release_pages + tlb_flush_mmu_free +...... diff --git a/source/tools/monitor/sched/schedmoni/bpf/schedmoni.bpf.c b/source/tools/monitor/sched/schedmoni/bpf/schedmoni.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..249a37dea4f130d918ada0fac81124b763d133e8 --- /dev/null +++ b/source/tools/monitor/sched/schedmoni/bpf/schedmoni.bpf.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include "../schedmoni.h" +#include "../nosched.comm.h" + +#define TASK_RUNNING 0 +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 4); + __type(key, u32); + __type(value, struct args); +} argmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10240); + __type(key, u32); + __type(value, u64); +} start SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} events SEC(".maps"); + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(u32), + .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") stackmap_ext = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct ext_key), + .value_size = sizeof(struct ext_val), + .max_entries = 10000, +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, MAX_MONI_NR); + __type(key, u64); + __type(value, struct latinfo); +} info_map SEC(".maps"); + +/* + * the return value type can only be assigned to 0, + * so it can be int ,long , long long and the unsinged version + * */ +#define GETARG_FROM_ARRYMAP(map,argp,type,member)({ \ + type retval = 0; \ + int i = 0; \ + argp = bpf_map_lookup_elem(&map, &i); \ + if (argp) { \ + retval = _(argp->member); \ + } \ + retval; \ + }) + +#define BPF_F_FAST_STACK_CMP (1ULL << 9) +#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) + +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_PER_LONG 64 + +static inline int test_bit(int nr, const volatile unsigned long *addr) +{ + return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); +} + +static inline int test_ti_thread_flag(struct thread_info *ti, int nr) +{ + int result; + unsigned long *addr; + unsigned long tmp = _(ti->flags); + + addr = &tmp; + result = 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); + return result; +} + +static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) +{ + struct thread_info tf, *tfp; + + tfp = &(tsk->thread_info); + bpf_probe_read(&tf, sizeof(tf), &(tsk->thread_info)); + tfp = &tf; + return test_ti_thread_flag(tfp, flag); +} + +static inline int test_tsk_need_resched(struct task_struct *tsk, int flag) +{ + return test_tsk_thread_flag(tsk, flag); +} + +/* record enqueue timestamp */ +static __always_inline +int trace_enqueue(u32 tgid, u32 pid) +{ + u64 ts; + pid_t targ_tgid, targ_pid; + struct args *argp; + + if (!pid) + return 0; + + targ_tgid = GETARG_FROM_ARRYMAP(argmap, argp, pid_t, targ_tgid); + targ_pid = GETARG_FROM_ARRYMAP(argmap, argp, pid_t, targ_pid); + if (targ_tgid && targ_tgid != tgid) + return 0; + if (targ_pid && targ_pid != pid) + return 0; + + ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&start, &pid, &ts, 0); + return 0; +} + +SEC("tp/sched/sched_wakeup") +int handle__sched_wakeup(struct trace_event_raw_sched_wakeup_template *ctx) +{ + pid_t pid = 0; + bpf_probe_read(&pid, sizeof(pid), &(ctx->pid)); + + return trace_enqueue(0, pid); +} + +SEC("tp/sched/sched_wakeup_new") +int handle__sched_wakeup_new(struct trace_event_raw_sched_wakeup_template *ctx) +{ + pid_t pid = 0; + bpf_probe_read(&pid, sizeof(pid), &(ctx->pid)); + + return trace_enqueue(0, pid); +} + +SEC("tp/sched/sched_switch") +int handle_switch(struct trace_event_raw_sched_switch *ctx) +{ + u64 cpuid; + u32 pid, prev_pid; + long int prev_state; + struct event event = {}; + u64 *tsp, delta_us, min_us; + struct args *argp; + struct latinfo *latp; + struct latinfo lati; + + prev_pid = ctx->prev_pid; + pid = ctx->next_pid; + prev_state = ctx->prev_state; + + cpuid = bpf_get_smp_processor_id(); + /* 1rst: nosched */ + latp = bpf_map_lookup_elem(&info_map, &cpuid); + if (latp) { + latp->last_seen_need_resched_ns = 0; + } else { + + __builtin_memset(&lati, 0, sizeof(struct latinfo)); + lati.last_seen_need_resched_ns = 0; + lati.ticks_without_resched = 0; + bpf_map_update_elem(&info_map, &cpuid, &lati, BPF_ANY); + } + + /* 2nd: runqslower */ + /* ivcsw: treat like an enqueue event and store timestamp */ + if (prev_state == TASK_RUNNING) + trace_enqueue(0, prev_pid); + + /* fetch timestamp and calculate delta */ + tsp = bpf_map_lookup_elem(&start, &pid); + if (!tsp) + return 0; /* missed enqueue */ + + delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; + min_us = GETARG_FROM_ARRYMAP(argmap, argp, u64, min_us); + if (min_us && delta_us <= min_us) + return 0; + + __builtin_memset(&event, 0, sizeof(struct event)); + event.cpuid = cpuid; + event.pid = pid; + event.prev_pid = prev_pid; + event.delta_us = delta_us; + bpf_probe_read(event.task, sizeof(event.task), &(ctx->next_comm)); + bpf_probe_read(event.prev_task, sizeof(event.prev_task), &(ctx->prev_comm)); + + /* output */ + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, + &event, sizeof(event)); + + bpf_map_delete_elem(&start, &pid); + + return 0; +} + +SEC("kprobe/account_process_tick") +int BPF_KPROBE(account_process_tick, struct task_struct *p, int user_tick) +{ + int args_key; + u64 cpuid; + u64 resched_latency, now; + struct latinfo lati, *latp; + struct args args, *argsp; + + __builtin_memset(&args_key, 0, sizeof(int)); + argsp = bpf_map_lookup_elem(&argmap, &args_key); + if (!argsp) + return 0; + + if(!test_tsk_need_resched(p, _(argsp->flag))) + return 0; + + now = bpf_ktime_get_ns(); + + __builtin_memset(&cpuid, 0, sizeof(u64)); + cpuid = bpf_get_smp_processor_id(); + latp = bpf_map_lookup_elem(&info_map, &cpuid); + if (latp) { + if (!latp->last_seen_need_resched_ns) { + latp->last_seen_need_resched_ns = now; + latp->ticks_without_resched = 0; + } else { + latp->ticks_without_resched++; + resched_latency = now - latp->last_seen_need_resched_ns; + if (resched_latency > _(argsp->min_us)) { + struct key_t key; + struct ext_key ext_key; + struct ext_val ext_val; + + __builtin_memset(&key, 0, sizeof(struct key_t)); + __builtin_memset(&ext_key, 0, sizeof(struct ext_key)); + __builtin_memset(&ext_val, 0, sizeof(struct ext_val)); + key.ret = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS); + ext_key.stamp = now; + ext_key.ret = key.ret; + ext_val.lat_us = resched_latency/1000; + bpf_get_current_comm(&ext_val.comm, sizeof(ext_val.comm)); + ext_val.pid = bpf_get_current_pid_tgid(); + ext_val.nosched_ticks = latp->ticks_without_resched; + ext_val.cpu = cpuid; + ext_val.stamp = latp->last_seen_need_resched_ns; + bpf_map_update_elem(&stackmap_ext, &ext_key, &ext_val, BPF_ANY); + } + } + } else { + __builtin_memset(&lati, 0, sizeof(struct latinfo)); + lati.last_seen_need_resched_ns = now; + lati.ticks_without_resched = 0; + bpf_map_update_elem(&info_map, &cpuid, &lati, BPF_ANY); + } + + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/monitor/sched/schedmoni/nosched.c b/source/tools/monitor/sched/schedmoni/nosched.c new file mode 100644 index 0000000000000000000000000000000000000000..95a1a4c532a071ffe947a1651b918810eb41d8ec --- /dev/null +++ b/source/tools/monitor/sched/schedmoni/nosched.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2021 Sartura + * Based on minimal.c by Facebook */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "nosched.comm.h" +#include "schedmoni.h" + +#define MAX_SYMS 300000 +extern FILE *fp_nsc; +//extern char filename[256] = {0}; + +extern volatile sig_atomic_t exiting; +static struct ksym syms[MAX_SYMS]; +static int sym_cnt; + +static int ksym_cmp(const void *p1, const void *p2) +{ + return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; +} + +int load_kallsyms(void) +{ + FILE *f = fopen("/proc/kallsyms", "r"); + char func[256], buf[256]; + char symbol; + void *addr; + int i = 0; + + if (!f) + return -ENOENT; + + while (!feof(f)) { + if (!fgets(buf, sizeof(buf), f)) + break; + if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) + break; + if (!addr) + continue; + syms[i].addr = (long) addr; + syms[i].name = strdup(func); + i++; + } + fclose(f); + sym_cnt = i; + qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); + return 0; +} + +struct ksym *ksym_search(long key) +{ + int start = 0, end = sym_cnt; + int result; + + /* kallsyms not loaded. return NULL */ + if (sym_cnt <= 0) + return NULL; + + while (start < end) { + size_t mid = start + (end - start) / 2; + + result = key - syms[mid].addr; + if (result < 0) + end = mid; + else if (result > 0) + start = mid + 1; + else + return &syms[mid]; + } + + if (start >= 1 && syms[start - 1].addr < key && + key < syms[start].addr) + /* valid ksym */ + return &syms[start - 1]; + + /* out of range. return _stext */ + return &syms[0]; +} + +static void print_ksym(__u64 addr) +{ + struct ksym *sym; + + if (!addr) + return; + + sym = ksym_search(addr); + fprintf(fp_nsc, "<%llx> %s\n", addr, sym->name); +} + +static void print_stack(int fd, struct ext_key *key) +{ + int i; + __u64 ip[PERF_MAX_STACK_DEPTH] = {}; + + if (bpf_map_lookup_elem(fd, &key->ret, &ip) == 0) { + for (i = 7; i < PERF_MAX_STACK_DEPTH - 1; i++) + print_ksym(ip[i]); + } else { + if ((int)(key->ret) < 0) + fprintf(fp_nsc, "<0x0000000000000000>:error=%d\n", (int)(key->ret)); + } +} + +#define SEC_TO_NS (1000*1000*1000) +static void stamp_to_date(__u64 stamp, char dt[], int len) +{ + time_t t, diff, last; + struct tm *tm; + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + time(&t); + diff = ts.tv_sec*SEC_TO_NS + ts.tv_nsec - stamp; + diff = diff/SEC_TO_NS; + + last = t - diff; + tm = localtime(&last); + strftime(dt, len, "%F_%H:%M:%S", tm); +} + +static void print_stacks(int fd, int ext_fd) +{ + char dt[64] = {0}; + struct ext_key ext_key = {}, next_key; + struct ext_val value; + + fprintf(fp_nsc, "%-21s %-6s %-16s %-8s %-10s\n", "TIME", "CPU", "COMM", "TID", "LAT(us)"); + while (bpf_map_get_next_key(ext_fd, &ext_key, &next_key) == 0) { + bpf_map_lookup_elem(ext_fd, &next_key, &value); + memset(dt, 0, sizeof(dt)); + stamp_to_date(value.stamp, dt, sizeof(dt)); + fprintf(fp_nsc, "%-21s %-6d %-16s %-8d %-10llu\n", + dt, value.cpu, value.comm, value.pid, value.lat_us); + print_stack(fd, &next_key); + bpf_map_delete_elem(ext_fd, &next_key); + ext_key = next_key; + } + printf("\n"); +} + +void *runnsc_handler(void *arg) +{ + int err; + struct tharg *runnsc = (struct tharg *)arg; + + err = load_kallsyms(); + if (err) { + fprintf(stderr, "Failed to load kallsyms\n"); + return NULL; + } + + while (!exiting) { + sleep(1); + } + printf("\n"); + print_stacks(runnsc->fd, runnsc->ext_fd); + + return NULL; +} diff --git a/source/tools/monitor/sched/schedmoni/nosched.comm.h b/source/tools/monitor/sched/schedmoni/nosched.comm.h new file mode 100644 index 0000000000000000000000000000000000000000..ae5e97b83103ccc462167db37dce4cf1cf97e30e --- /dev/null +++ b/source/tools/monitor/sched/schedmoni/nosched.comm.h @@ -0,0 +1,35 @@ +#define BPF_ANY 0 +#define MAX_MONI_NR 1024 + +/* latency thresh:10ms*/ +#define LAT_THRESH_NS (10*1000*1000) +#define TASK_COMM_LEN 16 +#define PERF_MAX_STACK_DEPTH 32 +#define TASK_COMM_LEN 16 +#define PERF_MAX_STACK_DEPTH 32 + +struct ksym { + long addr; + char *name; +}; + +struct key_t { + __u32 ret; +}; + +struct ext_key { + __u32 ret; + __u64 stamp; +}; + +struct ext_val { + int pid, cpu; + int nosched_ticks; + __u64 lat_us, stamp; + char comm[TASK_COMM_LEN]; +}; + +struct latinfo { + __u64 last_seen_need_resched_ns; + int ticks_without_resched; +}; diff --git a/source/tools/monitor/sched/schedmoni/runqslow.c b/source/tools/monitor/sched/schedmoni/runqslow.c new file mode 100644 index 0000000000000000000000000000000000000000..0b0a2ee59a1454540473dac5fe84303587a67bc8 --- /dev/null +++ b/source/tools/monitor/sched/schedmoni/runqslow.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "schedmoni.h" + +extern FILE *fp_rsw; +extern volatile sig_atomic_t exiting; +static int previous, th_ret; +extern struct env env; + +void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) +{ + const struct event *e = data; + struct tm *tm; + char ts[64]; + time_t t; + + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%F_%H:%M:%S", tm); + if (env.previous) + fprintf(fp_rsw, "%-21s %-6d %-16s %-8d %-10llu %-16s %-6d\n", ts, e->cpuid, e->task, e->pid, + e->delta_us, e->prev_task, e->prev_pid); + else + fprintf(fp_rsw, "%-21s %-6d %-16s %-8d %-10llu\n", ts, e->cpuid, e->task, e->pid, e->delta_us); +} + +void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) +{ + printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); +} + +void *runslw_handler(void *arg) +{ + int err = 0; + struct tharg *data = (struct tharg *)arg; + struct perf_buffer *pb = NULL; + struct perf_buffer_opts pb_opts = {}; + + previous = env.previous; + fprintf(fp_rsw, "Tracing run queue latency higher than %llu us\n", env.min_us); + if (env.previous) + fprintf(fp_rsw, "%-21s %-6s %-16s %-8s %-10s %-16s %-6s\n", "TIME", "CPU", "COMM", "TID", "LAT(us)", "PREV COMM", "PREV TID"); + else + fprintf(fp_rsw, "%-21s %-6s %-16s %-8s %-10s\n", "TIME", "CPU", "COMM", "TID", "LAT(us)"); + + pb_opts.sample_cb = handle_event; + pb = perf_buffer__new(data->fd, 64, &pb_opts); + if (!pb) { + err = -errno; + fprintf(stderr, "failed to open perf buffer: %d\n", err); + goto clean_runslw; + } + + while (!exiting) { + err = perf_buffer__poll(pb, 100); + if (err < 0 && err != -EINTR) { + fprintf(stderr, "error polling perf buffer: %s\n", strerror(-err)); + goto clean_runslw; + } + /* reset err to return 0 if exiting */ + err = 0; + } + +clean_runslw: + perf_buffer__free(pb); + th_ret = err; + return &th_ret; +} diff --git a/source/tools/monitor/sched/schedmoni/schedmoni.c b/source/tools/monitor/sched/schedmoni/schedmoni.c new file mode 100644 index 0000000000000000000000000000000000000000..989f08c92c1576748d96437017f482db278605e0 --- /dev/null +++ b/source/tools/monitor/sched/schedmoni/schedmoni.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "schedmoni.h" +#include "bpf/schedmoni.skel.h" + +FILE *fp_nsc = NULL, *fp_rsw = NULL; +volatile sig_atomic_t exiting = 0; +char rswf[] = "/var/log/sysak/runslow.log"; +char nscf[] = "/var/log/sysak/nosched.log"; +char filename[256] = {0}; + +struct env env = { + .min_us = 10000, + .fp = NULL, +}; + +const char *argp_program_version = "schedmoni 0.1"; +const char argp_program_doc[] = +"Trace high run queue latency.\n" +"\n" +"USAGE: schedmoni [--help] [-p PID] [-t TID] [-P] [min_us] [-f ./runqslow.log]\n" +"\n" +"EXAMPLES:\n" +" schedmoni # trace latency higher than 10000 us (default)\n" +" schedmoni -f a.log # trace latency and record result to a.log (default to /var/log/sysak/runslow.log)\n" +" schedmoni 1000 # trace latency higher than 1000 us\n" +" schedmoni -p 123 # trace pid 123\n" +" schedmoni -t 123 # trace tid 123 (use for threads only)\n" +" schedmoni -P # also show previous task name and TID\n"; + +static const struct argp_option opts[] = { + { "pid", 'p', "PID", 0, "Process PID to trace"}, + { "tid", 't', "TID", 0, "Thread TID to trace"}, + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + { "previous", 'P', NULL, 0, "also show previous task name and TID" }, + { "logfile", 'f', "LOGFILE", 0, "logfile for result"}, + { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, + {}, +}; + +static void bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + static int pos_args; + int pid; + long long min_us; + + switch (key) { + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + case 'v': + env.verbose = true; + break; + case 'P': + env.previous = true; + break; + case 'p': + errno = 0; + pid = strtol(arg, NULL, 10); + if (errno || pid <= 0) { + fprintf(stderr, "Invalid PID: %s\n", arg); + argp_usage(state); + } + env.pid = pid; + break; + case 't': + errno = 0; + pid = strtol(arg, NULL, 10); + if (errno || pid <= 0) { + fprintf(stderr, "Invalid TID: %s\n", arg); + argp_usage(state); + } + env.tid = pid; + break; + case 'f': + if (strlen(arg) < 2) { + strncpy(filename, rswf, sizeof(filename)); + fp_rsw = fopen(filename, "a+"); + if (!fp_rsw) { + int ret = errno; + fprintf(stderr, "%s :fopen %s\n", + strerror(errno), filename); + return ret; + } + memset(filename, 0, sizeof(filename)); + strncpy(filename, nscf, sizeof(filename)); + fp_nsc = fopen(filename, "a+"); + if (!fp_nsc) { + int ret = errno; + fprintf(stderr, "%s :fopen %s\n", + strerror(errno), filename); + return ret; + } + } else { + snprintf(filename, sizeof(filename), "%s.rswf", arg); + fp_rsw = fopen(filename, "a+"); + if (!fp_rsw) { + int ret = errno; + fprintf(stderr, "%s :fopen %s\n", + strerror(errno), filename); + return ret; + } + memset(filename, 0, sizeof(filename)); + snprintf(filename, sizeof(filename), "%s.nscf", arg); + fp_nsc = fopen(filename, "a+"); + if (!fp_nsc) { + int ret = errno; + fprintf(stderr, "%s :fopen %s\n", + strerror(errno), filename); + return ret; + } + } + break; + case ARGP_KEY_ARG: + if (pos_args++) { + fprintf(stderr, + "Unrecognized positional argument: %s\n", arg); + argp_usage(state); + } + errno = 0; + min_us = strtoll(arg, NULL, 10); + if (errno || min_us <= 0) { + fprintf(stderr, "Invalid delay (in us): %s\n", arg); + argp_usage(state); + } + env.min_us = min_us; + break; + default: + return ARGP_ERR_UNKNOWN; + } + if (!fp_rsw && !fp_nsc) { + strncpy(filename, rswf, sizeof(filename)); + fp_rsw = fopen(filename, "a+"); + if (!fp_rsw) { + int ret = errno; + fprintf(stderr, "%s :fopen %s\n", + strerror(errno), filename); + return ret; + } + memset(filename, 0, sizeof(filename)); + strncpy(filename, nscf, sizeof(filename)); + fp_nsc = fopen(filename, "a+"); + if (!fp_nsc) { + int ret = errno; + fprintf(stderr, "%s :fopen %s\n", + strerror(errno), filename); + return ret; + } + } + return 0; +} + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static void sig_int(int signo) +{ + exiting = 1; +} + +void handle_event(void *ctx, int cpu, void *data, __u32 data_sz); +void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt); +void *runslw_handler(void *arg); +void *runnsc_handler(void *arg); + +int main(int argc, char **argv) +{ + void *res; + int i, err, err1, err2; + int arg_fd, ent_fd, stk_fd, stkext_fd; + pthread_t pt_runslw/*, pt_rqs*/; + struct schedmoni_bpf *obj; + struct args args = {}; + struct tharg runslw = {}; + struct tharg runnsc = {}; + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + libbpf_set_print(libbpf_print_fn); + + bump_memlock_rlimit(); + + obj = schedmoni_bpf__open(); + if (!obj) { + fprintf(stderr, "failed to open BPF object\n"); + return 1; + } + + err = schedmoni_bpf__load(obj); + if (err) { + fprintf(stderr, "failed to load BPF object: %d\n", err); + goto cleanup; + } + + err = schedmoni_bpf__attach(obj); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + i = 0; + arg_fd = bpf_map__fd(obj->maps.argmap); + ent_fd = bpf_map__fd(obj->maps.events); + stk_fd = bpf_map__fd(obj->maps.stackmap); + stkext_fd = bpf_map__fd(obj->maps.stackmap_ext); + args.targ_tgid = env.pid; + args.targ_pid = env.tid; + args.min_us = env.min_us; + args.flag = TIF_NEED_RESCHED; + + err = bpf_map_update_elem(arg_fd, &i, &args, 0); + if (err) { + fprintf(stderr, "Failed to update flag map\n"); + goto cleanup; + } + + if (signal(SIGINT, sig_int) == SIG_ERR) { + fprintf(stderr, "can't set signal handler: %s\n", strerror(errno)); + err = 1; + goto cleanup; + } + + runslw.fd = ent_fd; + err = pthread_create(&pt_runslw, NULL, runslw_handler, &runslw); + if (err) { + fprintf(stderr, "can't pthread_create runslw: %s\n", strerror(errno)); + goto cleanup; + } + runnsc.fd = stk_fd; + runnsc.ext_fd = stkext_fd; + + err = pthread_create(&pt_runslw, NULL, runnsc_handler, &runnsc); + if (err) { + fprintf(stderr, "can't pthread_create runslw: %s\n", strerror(errno)); + goto cleanup; + } + + err1 = pthread_join(pt_runslw, &res); + err2 = pthread_join(pt_runslw, &res); + if (err1 || err2) { + goto cleanup; + } + //printf("retvalue=%d\n", *res); + +cleanup: + schedmoni_bpf__destroy(obj); + + return err != 0; +} diff --git a/source/tools/monitor/sched/schedmoni/schedmoni.h b/source/tools/monitor/sched/schedmoni/schedmoni.h new file mode 100644 index 0000000000000000000000000000000000000000..5b1ba74d2d716add114525da968e159b0cec5b8c --- /dev/null +++ b/source/tools/monitor/sched/schedmoni/schedmoni.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __RUNQSLOWER_H +#define __RUNQSLOWER_H +#define TASK_COMM_LEN 16 +#ifdef __x86_64__ +#define TIF_NEED_RESCHED 3 +#elif defined (__aarch64__) +#define TIF_NEED_RESCHED 1 +#endif + + +struct args { + __u64 min_us; + pid_t targ_pid; + pid_t targ_tgid; + int flag; +}; + +struct tharg { + int fd; + int ext_fd; +}; + +struct env { + pid_t pid; + pid_t tid; + __u64 min_us; + bool previous; + bool verbose; + void *fp; +}; + +struct event { + char task[TASK_COMM_LEN]; + char prev_task[TASK_COMM_LEN]; + + __u64 delta_us; + pid_t pid; + pid_t prev_pid; + int cpuid; +}; + +#endif /* __RUNQSLOWER_H */