diff --git a/source/tools/monitor/sched/runqslower/bpf/runqslower.bpf.c b/source/tools/monitor/sched/runqslower/bpf/runqslower.bpf.c index 38d514e82e9ee547c363b94d69929322006ac125..d362fd3fc283a98628869699172dca105fce38c8 100644 --- a/source/tools/monitor/sched/runqslower/bpf/runqslower.bpf.c +++ b/source/tools/monitor/sched/runqslower/bpf/runqslower.bpf.c @@ -7,12 +7,6 @@ #define TASK_RUNNING 0 #define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) -struct args { - __u64 min_us; - pid_t targ_pid; - pid_t targ_tgid; -}; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); @@ -51,15 +45,20 @@ struct { static __always_inline int trace_enqueue(u32 tgid, u32 pid) { - u64 ts; - pid_t targ_tgid, targ_pid; + u64 ts, i = 0; + pid_t targ_tgid, targ_pid, filter_pid; struct args *argp; - if (!pid) + argp = bpf_map_lookup_elem(&argmap, &i); + if (!argp) + return 0; + + filter_pid = _(argp->filter_pid); + if (!pid || (pid == filter_pid)) return 0; - targ_tgid = GETARG_FROM_ARRYMAP(argmap, argp, pid_t, targ_tgid); - targ_pid = GETARG_FROM_ARRYMAP(argmap, argp, pid_t, targ_pid); + targ_tgid = _(argp->targ_tgid); + targ_pid = _(argp->targ_pid); if (targ_tgid && targ_tgid != tgid) return 0; if (targ_pid && targ_pid != pid) @@ -95,7 +94,7 @@ int handle_switch(struct trace_event_raw_sched_switch *ctx) u32 pid, prev_pid; long int prev_state; struct event event = {}; - u64 *tsp, delta_us, min_us; + u64 *tsp, delta_us, min_us, now; struct args *argp; prev_pid = ctx->prev_pid; @@ -106,13 +105,12 @@ int handle_switch(struct trace_event_raw_sched_switch *ctx) if (prev_state == TASK_RUNNING) trace_enqueue(0, prev_pid); - /* fetch timestamp and calculate delta */ tsp = bpf_map_lookup_elem(&start, &pid); if (!tsp) return 0; /* missed enqueue */ - - delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; + now = bpf_ktime_get_ns(); + delta_us = (now - *tsp) / 1000; min_us = GETARG_FROM_ARRYMAP(argmap, argp, u64, min_us); if (min_us && delta_us <= min_us) return 0; @@ -121,6 +119,7 @@ int handle_switch(struct trace_event_raw_sched_switch *ctx) event.pid = pid; event.prev_pid = prev_pid; event.delta_us = delta_us; + event.stamp = now; bpf_probe_read(event.task, sizeof(event.task), &(ctx->next_comm)); bpf_probe_read(event.prev_task, sizeof(event.prev_task), &(ctx->prev_comm)); diff --git a/source/tools/monitor/sched/runqslower/runqslower.c b/source/tools/monitor/sched/runqslower/runqslower.c index 2fb199fdddea0779162e790a033d778f66a30e39..7c37e99df1c6a89c0e515d5014b9250698e72ade 100644 --- a/source/tools/monitor/sched/runqslower/runqslower.c +++ b/source/tools/monitor/sched/runqslower/runqslower.c @@ -24,6 +24,7 @@ char log_dir[] = "/var/log/sysak/runqslow/"; char defaultfile[] = "/var/log/sysak/runqslow/runqslow.log"; char filename[256] = {0}; +struct summary summary; struct env { pid_t pid; pid_t tid; @@ -31,6 +32,7 @@ struct env { __u64 min_us; bool previous; bool verbose; + bool summary; } env = { .span = 0, .min_us = 10000, @@ -58,6 +60,7 @@ static const struct argp_option opts[] = { { "tid", 't', "TID", 0, "Thread TID to trace"}, { "span", 's', "SPAN", 0, "How long to run"}, { "verbose", 'v', NULL, 0, "Verbose debug output" }, + { "summary", 'S', NULL, 0, "Output the summary info" }, { "previous", 'P', NULL, 0, "also show previous task name and TID" }, { "logfile", 'f', "LOGFILE", 0, "logfile for result"}, { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, @@ -102,6 +105,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 'v': env.verbose = true; break; + case 'S': + env.summary = true; + break; case 'P': env.previous = true; break; @@ -201,11 +207,41 @@ void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) time(&t); tm = localtime(&t); strftime(ts, sizeof(ts), "%F %H:%M:%S", tm); - if (env.previous) - fprintf(filep, "%-21s %-6d %-16s %-8d %-10llu %-16s %-6d\n", ts, e->cpuid, e->task, e->pid, - e->delta_us, e->prev_task, e->prev_pid); - else - fprintf(filep, "%-21s %-6d %-16s %-8d %-10llu\n", ts, e->cpuid, e->task, e->pid, e->delta_us); + if (env.summary) { + int idx, i; + + if(fseek(filep, 0L, SEEK_SET) < 0) + return; + summary.num++; + idx = summary.num % CPU_ARRY_LEN; + summary.total += e->delta_us; + summary.cpus[idx] = e->cpuid; + if (summary.max.value < e->delta_us) { + summary.max.value = e->delta_us; + summary.max.cpu = e->cpuid; + summary.max.pid = e->pid; + summary.max.stamp = e->stamp; + strncpy(summary.max.comm, e->task, 16); + } + + fprintf(filep, "rqslow %-5lu %-6llu", + summary.num, summary.total/1000); + + for (i = 1; i <= CPU_ARRY_LEN; i++) + fprintf(filep, " %d", summary.cpus[(idx+i)%CPU_ARRY_LEN]); + + fprintf(filep, " %-4llu %-12llu %-3d %-9d %-15s\n", + summary.max.value/1000, summary.max.stamp/1000, + summary.max.cpu, summary.max.pid, summary.max.comm); + } else { + if (env.previous) + fprintf(filep, "%-21s %-6d %-16s %-8d %-10llu %-16s %-6d\n", + ts, e->cpuid, e->task, e->pid, + e->delta_us, e->prev_task, e->prev_pid); + else + fprintf(filep, "%-21s %-6d %-16s %-8d %-10llu\n", + ts, e->cpuid, e->task, e->pid, e->delta_us); + } } void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) @@ -213,12 +249,6 @@ void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); } -struct args { - __u64 min_us; - pid_t targ_pid; - pid_t targ_tgid; -}; - int main(int argc, char **argv) { int i, err, map_fd; @@ -266,19 +296,17 @@ int main(int argc, char **argv) map_fd = bpf_map__fd(obj->maps.argmap); args.targ_tgid = env.pid; args.targ_pid = env.tid; + args.filter_pid = getpid(); args.min_us = env.min_us; - bpf_map_update_elem(map_fd, &i, &args, 0); - if (err) { - fprintf(stderr, "Failed to update flag map\n"); - goto cleanup; - } - - fprintf(filep, "Tracing run queue latency higher than %llu us\n", env.min_us); - if (env.previous) - fprintf(filep, "%-21s %-6s %-16s %-8s %-10s %-16s %-6s\n", "TIME(runslw)", "CPU", "COMM", "TID", "LAT(us)", "PREV COMM", "PREV TID"); - else - fprintf(filep, "%-21s %-6s %-16s %-8s %-10s\n", "TIME(runslw)", "CPU", "COMM", "TID", "LAT(us)"); + if (!env.summary) { + if (env.previous) + fprintf(filep, "%-21s %-6s %-16s %-8s %-10s %-16s %-6s\n", + "TIME(runslw)", "CPU", "COMM", "TID", "LAT(us)", "PREV COMM", "PREV TID"); + else + fprintf(filep, "%-21s %-6s %-16s %-8s %-10s\n", + "TIME(runslw)", "CPU", "COMM", "TID", "LAT(us)"); + } pb_opts.sample_cb = handle_event; pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts); if (!pb) { @@ -297,6 +325,11 @@ int main(int argc, char **argv) if (env.span) alarm(env.span); + bpf_map_update_elem(map_fd, &i, &args, 0); + if (err) { + fprintf(stderr, "Failed to update flag map\n"); + goto cleanup; + } while (!exiting) { err = perf_buffer__poll(pb, 100); if (err < 0 && err != -EINTR) { diff --git a/source/tools/monitor/sched/runqslower/runqslower.h b/source/tools/monitor/sched/runqslower/runqslower.h index d32e3e7dfb724140702516210b5f36b309552960..ca492ab84b94d516753d455a7d2ac030575f891c 100644 --- a/source/tools/monitor/sched/runqslower/runqslower.h +++ b/source/tools/monitor/sched/runqslower/runqslower.h @@ -3,15 +3,39 @@ #define __RUNQSLOWER_H #define TASK_COMM_LEN 16 +#define CPU_ARRY_LEN 4 struct event { char task[TASK_COMM_LEN]; char prev_task[TASK_COMM_LEN]; __u64 delta_us; + __u64 stamp; pid_t pid; pid_t prev_pid; int cpuid; }; +struct max_sum { + __u64 value; + __u64 stamp; + int cpu, pid; + char comm[TASK_COMM_LEN]; +}; + +struct summary { + unsigned long num; + __u64 total; + struct max_sum max; + int cpus[CPU_ARRY_LEN]; +}; + +struct args { + __u64 min_us; + pid_t targ_pid; + pid_t targ_tgid; + pid_t filter_pid; + pid_t pad; +}; + #endif /* __RUNQSLOWER_H */