From 05f1c9f8b4f551be806ca6d208cd351a27c9a30b Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Wed, 9 Mar 2022 14:34:42 +0800 Subject: [PATCH] monitor/sched: Add task run slower monitor This tool monitor this: tasks queued in run queue and wait a long time to arrive cpu. Signed-off-by: Hailong Liu --- .../tools/monitor/sched/runqslower/Makefile | 8 + .../sched/runqslower/bpf/runqslower.bpf.c | 135 +++++++++ .../monitor/sched/runqslower/runqslower.c | 273 ++++++++++++++++++ .../monitor/sched/runqslower/runqslower.h | 17 ++ 4 files changed, 433 insertions(+) create mode 100644 source/tools/monitor/sched/runqslower/Makefile create mode 100644 source/tools/monitor/sched/runqslower/bpf/runqslower.bpf.c create mode 100644 source/tools/monitor/sched/runqslower/runqslower.c create mode 100644 source/tools/monitor/sched/runqslower/runqslower.h diff --git a/source/tools/monitor/sched/runqslower/Makefile b/source/tools/monitor/sched/runqslower/Makefile new file mode 100644 index 00000000..47dcb339 --- /dev/null +++ b/source/tools/monitor/sched/runqslower/Makefile @@ -0,0 +1,8 @@ + +newdirs := $(shell find ./ -type d) + +bpfsrcs := $(wildcard bpf/*.bpf.c) +csrcs := $(wildcard *.c) +target := runqslower + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/monitor/sched/runqslower/bpf/runqslower.bpf.c b/source/tools/monitor/sched/runqslower/bpf/runqslower.bpf.c new file mode 100644 index 00000000..38d514e8 --- /dev/null +++ b/source/tools/monitor/sched/runqslower/bpf/runqslower.bpf.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include +#include +#include "../runqslower.h" + +#define TASK_RUNNING 0 +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) + +struct args { + __u64 min_us; + pid_t targ_pid; + pid_t targ_tgid; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 4); + __type(key, u32); + __type(value, struct args); +} argmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10240); + __type(key, u32); + __type(value, u64); +} start SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} events SEC(".maps"); + +/* + * the return value type can only be assigned to 0, + * so it can be int ,long , long long and the unsinged version + * */ +#define GETARG_FROM_ARRYMAP(map,argp,type,member)({ \ + type retval = 0; \ + int i = 0; \ + argp = bpf_map_lookup_elem(&map, &i); \ + if (argp) { \ + retval = _(argp->member); \ + } \ + retval; \ + }) + +/* record enqueue timestamp */ +static __always_inline +int trace_enqueue(u32 tgid, u32 pid) +{ + u64 ts; + pid_t targ_tgid, targ_pid; + struct args *argp; + + if (!pid) + return 0; + + targ_tgid = GETARG_FROM_ARRYMAP(argmap, argp, pid_t, targ_tgid); + targ_pid = GETARG_FROM_ARRYMAP(argmap, argp, pid_t, targ_pid); + if (targ_tgid && targ_tgid != tgid) + return 0; + if (targ_pid && targ_pid != pid) + return 0; + + ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&start, &pid, &ts, 0); + return 0; +} + +SEC("tp/sched/sched_wakeup") +int handle__sched_wakeup(struct trace_event_raw_sched_wakeup_template *ctx) +{ + pid_t pid = 0; + bpf_probe_read(&pid, sizeof(pid), &(ctx->pid)); + + return trace_enqueue(0, pid); +} + +SEC("tp/sched/sched_wakeup_new") +int handle__sched_wakeup_new(struct trace_event_raw_sched_wakeup_template *ctx) +{ + pid_t pid = 0; + bpf_probe_read(&pid, sizeof(pid), &(ctx->pid)); + + return trace_enqueue(0, pid); +} + +SEC("tp/sched/sched_switch") +int handle_switch(struct trace_event_raw_sched_switch *ctx) +{ + int cpuid; + u32 pid, prev_pid; + long int prev_state; + struct event event = {}; + u64 *tsp, delta_us, min_us; + struct args *argp; + + prev_pid = ctx->prev_pid; + pid = ctx->next_pid; + prev_state = ctx->prev_state; + cpuid = bpf_get_smp_processor_id(); + /* ivcsw: treat like an enqueue event and store timestamp */ + if (prev_state == TASK_RUNNING) + trace_enqueue(0, prev_pid); + + + /* fetch timestamp and calculate delta */ + tsp = bpf_map_lookup_elem(&start, &pid); + if (!tsp) + return 0; /* missed enqueue */ + + delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; + min_us = GETARG_FROM_ARRYMAP(argmap, argp, u64, min_us); + if (min_us && delta_us <= min_us) + return 0; + + event.cpuid = cpuid; + event.pid = pid; + event.prev_pid = prev_pid; + event.delta_us = delta_us; + bpf_probe_read(event.task, sizeof(event.task), &(ctx->next_comm)); + bpf_probe_read(event.prev_task, sizeof(event.prev_task), &(ctx->prev_comm)); + + /* output */ + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, + &event, sizeof(event)); + + bpf_map_delete_elem(&start, &pid); + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/source/tools/monitor/sched/runqslower/runqslower.c b/source/tools/monitor/sched/runqslower/runqslower.c new file mode 100644 index 00000000..3f61e311 --- /dev/null +++ b/source/tools/monitor/sched/runqslower/runqslower.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2019 Facebook +// +// Based on runqslower(8) from BCC by Ivan Babrou. +// 11-Feb-2020 Andrii Nakryiko Created this. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "runqslower.h" +#include "bpf/runqslower.skel.h" + +FILE *filep = NULL; +static volatile sig_atomic_t exiting = 0; +char defaultfile[] = "/var/log/sysak/runslow.log"; +char filename[256] = {0}; + +struct env { + pid_t pid; + pid_t tid; + __u64 min_us; + bool previous; + bool verbose; +} env = { + .min_us = 10000, +}; + +const char *argp_program_version = "runqslower 0.1"; +const char *argp_program_bug_address = + "https://github.com/iovisor/bcc/tree/master/libbpf-tools"; +const char argp_program_doc[] = +"Trace high run queue latency.\n" +"\n" +"USAGE: runqslower [--help] [-p PID] [-t TID] [-P] [min_us] [-f ./runqslow.log]\n" +"\n" +"EXAMPLES:\n" +" runqslower # trace latency higher than 10000 us (default)\n" +" runqslower -f a.log # trace latency and record result to a.log (default to /var/log/sysak/runslow.log)\n" +" runqslower 1000 # trace latency higher than 1000 us\n" +" runqslower -p 123 # trace pid 123\n" +" runqslower -t 123 # trace tid 123 (use for threads only)\n" +" runqslower -P # also show previous task name and TID\n"; + +static const struct argp_option opts[] = { + { "pid", 'p', "PID", 0, "Process PID to trace"}, + { "tid", 't', "TID", 0, "Thread TID to trace"}, + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + { "previous", 'P', NULL, 0, "also show previous task name and TID" }, + { "logfile", 'f', "LOGFILE", 0, "logfile for result"}, + { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, + {}, +}; + +static void bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + static int pos_args; + int pid; + long long min_us; + + switch (key) { + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + case 'v': + env.verbose = true; + break; + case 'P': + env.previous = true; + break; + case 'p': + errno = 0; + pid = strtol(arg, NULL, 10); + if (errno || pid <= 0) { + fprintf(stderr, "Invalid PID: %s\n", arg); + argp_usage(state); + } + env.pid = pid; + break; + case 't': + errno = 0; + pid = strtol(arg, NULL, 10); + if (errno || pid <= 0) { + fprintf(stderr, "Invalid TID: %s\n", arg); + argp_usage(state); + } + env.tid = pid; + break; + case 'f': + if (strlen(arg) < 2) + strncpy(filename, defaultfile, sizeof(filename)); + else + strncpy(filename, arg, sizeof(filename)); + filep = fopen(filename, "a+"); + if (!filep) { + int ret = errno; + fprintf(stderr, "%s :fopen %s\n", + strerror(errno), filename); + return ret; + } + break; + case ARGP_KEY_ARG: + if (pos_args++) { + fprintf(stderr, + "Unrecognized positional argument: %s\n", arg); + argp_usage(state); + } + errno = 0; + min_us = strtoll(arg, NULL, 10); + if (errno || min_us <= 0) { + fprintf(stderr, "Invalid delay (in us): %s\n", arg); + argp_usage(state); + } + env.min_us = min_us; + break; + default: + return ARGP_ERR_UNKNOWN; + } + if (!filep) { + filep = fopen(defaultfile, "a+"); + if (!filep) { + int ret = errno; + fprintf(stderr, "%s :fopen %s\n", + strerror(errno), defaultfile); + return ret; + } + } + return 0; +} + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static void sig_int(int signo) +{ + exiting = 1; +} + +void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) +{ + const struct event *e = data; + struct tm *tm; + char ts[64]; + time_t t; + + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%F %H:%M:%S", tm); + if (env.previous) + fprintf(filep, "%-21s %-6d %-16s %-8d %-10llu %-16s %-6d\n", ts, e->cpuid, e->task, e->pid, + e->delta_us, e->prev_task, e->prev_pid); + else + fprintf(filep, "%-21s %-6d %-16s %-8d %-10llu\n", ts, e->cpuid, e->task, e->pid, e->delta_us); +} + +void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) +{ + printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); +} + +struct args { + __u64 min_us; + pid_t targ_pid; + pid_t targ_tgid; +}; + +int main(int argc, char **argv) +{ + int i, err, map_fd; + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + struct perf_buffer *pb = NULL; + struct runqslower_bpf *obj; + struct perf_buffer_opts pb_opts = {}; + struct args args = {}; + + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + libbpf_set_print(libbpf_print_fn); + + bump_memlock_rlimit(); + + obj = runqslower_bpf__open(); + if (!obj) { + fprintf(stderr, "failed to open BPF object\n"); + return 1; + } + + err = runqslower_bpf__load(obj); + if (err) { + fprintf(stderr, "failed to load BPF object: %d\n", err); + goto cleanup; + } + + err = runqslower_bpf__attach(obj); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + i = 0; + map_fd = bpf_map__fd(obj->maps.argmap); + args.targ_tgid = env.pid; + args.targ_pid = env.tid; + args.min_us = env.min_us; + bpf_map_update_elem(map_fd, &i, &args, 0); + if (err) { + fprintf(stderr, "Failed to update flag map\n"); + goto cleanup; + } + + fprintf(filep, "Tracing run queue latency higher than %llu us\n", env.min_us); + if (env.previous) + fprintf(filep, "%-21s %-6s %-16s %-8s %-10s %-16s %-6s\n", "TIME", "CPU", "COMM", "TID", "LAT(us)", "PREV COMM", "PREV TID"); + else + fprintf(filep, "%-21s %-6s %-16s %-8s %-10s\n", "TIME", "CPU", "COMM", "TID", "LAT(us)"); + + pb_opts.sample_cb = handle_event; + pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts); + if (!pb) { + err = -errno; + fprintf(stderr, "failed to open perf buffer: %d\n", err); + goto cleanup; + } + + if (signal(SIGINT, sig_int) == SIG_ERR) { + fprintf(stderr, "can't set signal handler: %s\n", strerror(errno)); + err = 1; + goto cleanup; + } + + while (!exiting) { + err = perf_buffer__poll(pb, 100); + if (err < 0 && err != -EINTR) { + fprintf(stderr, "error polling perf buffer: %s\n", strerror(-err)); + goto cleanup; + } + /* reset err to return 0 if exiting */ + err = 0; + } + +cleanup: + perf_buffer__free(pb); + runqslower_bpf__destroy(obj); + + return err != 0; +} diff --git a/source/tools/monitor/sched/runqslower/runqslower.h b/source/tools/monitor/sched/runqslower/runqslower.h new file mode 100644 index 00000000..d32e3e7d --- /dev/null +++ b/source/tools/monitor/sched/runqslower/runqslower.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __RUNQSLOWER_H +#define __RUNQSLOWER_H + +#define TASK_COMM_LEN 16 + +struct event { + char task[TASK_COMM_LEN]; + char prev_task[TASK_COMM_LEN]; + + __u64 delta_us; + pid_t pid; + pid_t prev_pid; + int cpuid; +}; + +#endif /* __RUNQSLOWER_H */ -- Gitee