diff --git a/tools/lib/perf/include/perf/bpf_perf.h b/tools/lib/perf/include/perf/bpf_perf.h index e7cf6ba7b674bc94041cc28f879e7678ec2f2820..64c8d211726d7f6831dffedcdd8025536a201322 100644 --- a/tools/lib/perf/include/perf/bpf_perf.h +++ b/tools/lib/perf/include/perf/bpf_perf.h @@ -23,6 +23,7 @@ struct perf_event_attr_map_entry { __u32 link_id; __u32 diff_map_id; + __u8 supported; }; /* default attr_map name */ diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9692ebdd7f11e9aaa66525e6a6d3db172cab827e..712c170eb7781bba5d54259d5cad0d0535d7c6e4 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2624,6 +2624,7 @@ int cmd_stat(int argc, const char **argv) } else if (big_num_opt == 0) /* User passed --no-big-num */ stat_config.big_num = false; + target.inherit = !stat_config.no_inherit; err = target__validate(&target); if (err) { target__strerror(&target, err, errbuf, BUFSIZ); diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh index a87bb2814b4c6de8395219d268eaa15fb21b6e15..d5c135225cc184e27d49ef147968e61a6151ae73 100755 --- a/tools/perf/tests/shell/stat_bpf_counters.sh +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -4,42 +4,71 @@ set -e +workload="perf test -w sqrtloop" + # check whether $2 is within +/- 10% of $1 compare_number() { - first_num=$1 - second_num=$2 - - # upper bound is first_num * 110% - upper=$(expr $first_num + $first_num / 10 ) - # lower bound is first_num * 90% - lower=$(expr $first_num - $first_num / 10 ) - - if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then - echo "The difference between $first_num and $second_num are greater than 10%." - exit 1 - fi + first_num=$1 + second_num=$2 + + # upper bound is first_num * 120% + upper=$(expr $first_num + $first_num / 5 ) + # lower bound is first_num * 80% + lower=$(expr $first_num - $first_num / 5 ) + + if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then + echo "The difference between $first_num and $second_num are greater than 20%." + exit 1 + fi +} + +check_counts() +{ + base_instructions=$1 + bpf_instructions=$2 + + if [ "$base_instructions" = "&1 | awk '/instructions/ {print $1}') + bpf_instructions=$(perf stat --no-big-num --bpf-counters -e instructions -- $workload 2>&1 | awk '/instructions/ {print $1}') + check_counts $base_instructions $bpf_instructions + compare_number $base_instructions $bpf_instructions + echo "[Success]" +} + +test_bpf_modifier() +{ + printf "Testing bpf event modifier " + stat_output=$(perf stat --no-big-num -e instructions/name=base_instructions/,instructions/name=bpf_instructions/b -- $workload 2>&1) + base_instructions=$(echo "$stat_output"| awk '/base_instructions/ {print $1}') + bpf_instructions=$(echo "$stat_output"| awk '/bpf_instructions/ {print $1}') + check_counts $base_instructions $bpf_instructions + compare_number $base_instructions $bpf_instructions + echo "[Success]" } # skip if --bpf-counters is not supported -if ! perf stat -e cycles --bpf-counters true > /dev/null 2>&1; then +if ! perf stat -e instructions --bpf-counters true > /dev/null 2>&1; then if [ "$1" = "-v" ]; then echo "Skipping: --bpf-counters not supported" - perf --no-pager stat -e cycles --bpf-counters true || true + perf --no-pager stat -e instructions --bpf-counters true || true fi exit 2 fi -base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') -if [ "$base_cycles" = "&1 | awk '/cycles/ {print $1}') -if [ "$bpf_cycles" = "maps.diff_readings); entry->link_id = bpf_link_get_id(link_fd); entry->diff_map_id = bpf_map_get_id(diff_map_fd); - err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY); - assert(err == 0); - - evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id); - assert(evsel->bperf_leader_link_fd >= 0); - /* * save leader_skel for install_pe, which is called within * following evsel__open_per_cpu call */ evsel->leader_skel = skel; - evsel__open_per_cpu(evsel, all_cpu_map, -1); + if (!evsel__open_per_cpu(evsel, all_cpu_map, -1)) + entry->supported = true; + + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY); + assert(err == 0); + + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id); + assert(evsel->bperf_leader_link_fd >= 0); out: bperf_leader_bpf__destroy(skel); @@ -441,12 +443,38 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, return err; } +static int bperf_attach_follower_program(struct bperf_follower_bpf *skel, + enum bperf_filter_type filter_type, + bool inherit) +{ + struct bpf_link *link; + int err = 0; + + if ((filter_type == BPERF_FILTER_PID || + filter_type == BPERF_FILTER_TGID) && inherit) + /* attach all follower bpf progs to enable event inheritance */ + err = bperf_follower_bpf__attach(skel); + else { + link = bpf_program__attach(skel->progs.fexit_XXX); + if (IS_ERR(link)) + err = PTR_ERR(link); + } + + return err; +} + static int bperf__load(struct evsel *evsel, struct target *target) { - struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; + struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff, false}; int attr_map_fd, diff_map_fd = -1, err; enum bperf_filter_type filter_type; - __u32 filter_entry_cnt, i; + __u32 i; + + if (evsel->evlist->core.nr_entries > ATTR_MAP_SIZE) { + pr_err("Too many events, please limit to %d or less\n", + ATTR_MAP_SIZE); + return -1; + } if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) return -1; @@ -485,6 +513,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) err = -1; goto out; } + evsel->supported = entry.supported; /* * The bpf_link holds reference to the leader program, and the * leader program holds reference to the maps. Therefore, if @@ -526,9 +555,6 @@ static int bperf__load(struct evsel *evsel, struct target *target) /* set up reading map */ bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, filter_entry_cnt); - /* set up follower filter based on target */ - bpf_map__set_max_entries(evsel->follower_skel->maps.filter, - filter_entry_cnt); err = bperf_follower_bpf__load(evsel->follower_skel); if (err) { pr_err("Failed to load follower skeleton\n"); @@ -540,6 +566,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) for (i = 0; i < filter_entry_cnt; i++) { int filter_map_fd; __u32 key; + struct bperf_filter_value fval = { i, 0 }; if (filter_type == BPERF_FILTER_PID || filter_type == BPERF_FILTER_TGID) @@ -550,12 +577,14 @@ static int bperf__load(struct evsel *evsel, struct target *target) break; filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); - bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); + bpf_map_update_elem(filter_map_fd, &key, &fval, BPF_ANY); } evsel->follower_skel->bss->type = filter_type; + evsel->follower_skel->bss->inherit = target->inherit; - err = bperf_follower_bpf__attach(evsel->follower_skel); + err = bperf_attach_follower_program(evsel->follower_skel, filter_type, + target->inherit); out: if (err && evsel->bperf_leader_link_fd >= 0) @@ -620,7 +649,7 @@ static int bperf__read(struct evsel *evsel) bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); - for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + for (i = 0; i < filter_entry_cnt; i++) { struct perf_cpu entry; __u32 cpu; diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index f193998530d431d828eb0ebee6840e166eb6aae7..0595063139a3d5555f6e9cc2314769be1a476d18 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -5,6 +5,8 @@ #include #include "bperf_u.h" +#define MAX_ENTRIES 102400 + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(__u32)); @@ -22,25 +24,29 @@ struct { struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bperf_filter_value)); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); } filter SEC(".maps"); enum bperf_filter_type type = 0; int enabled = 0; +int inherit; SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value *diff_val, *accum_val; __u32 filter_key, zero = 0; - __u32 *accum_key; + __u32 accum_key; + struct bperf_filter_value *fval; if (!enabled) return 0; switch (type) { case BPERF_FILTER_GLOBAL: - accum_key = &zero; + accum_key = zero; goto do_add; case BPERF_FILTER_CPU: filter_key = bpf_get_smp_processor_id(); @@ -49,22 +55,34 @@ int BPF_PROG(fexit_XXX) filter_key = bpf_get_current_pid_tgid() & 0xffffffff; break; case BPERF_FILTER_TGID: - filter_key = bpf_get_current_pid_tgid() >> 32; + /* Use pid as the filter_key to exclude new task counts + * when inherit is disabled. Don't worry about the existing + * children in TGID losing their counts, bpf_counter has + * already added them to the filter map via perf_thread_map + * before this bpf prog runs. + */ + filter_key = inherit ? + bpf_get_current_pid_tgid() >> 32 : + bpf_get_current_pid_tgid() & 0xffffffff; break; default: return 0; } - accum_key = bpf_map_lookup_elem(&filter, &filter_key); - if (!accum_key) + fval = bpf_map_lookup_elem(&filter, &filter_key); + if (!fval) return 0; + accum_key = fval->accum_key; + if (fval->exited) + bpf_map_delete_elem(&filter, &filter_key); + do_add: diff_val = bpf_map_lookup_elem(&diff_readings, &zero); if (!diff_val) return 0; - accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + accum_val = bpf_map_lookup_elem(&accum_readings, &accum_key); if (!accum_val) return 0; @@ -75,4 +93,70 @@ int BPF_PROG(fexit_XXX) return 0; } +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/task_newtask") +int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags) +{ + __u32 parent_key, child_key; + struct bperf_filter_value *parent_fval; + struct bperf_filter_value child_fval = { 0 }; + + if (!enabled) + return 0; + + switch (type) { + case BPERF_FILTER_PID: + parent_key = bpf_get_current_pid_tgid() & 0xffffffff; + child_key = task->pid; + break; + case BPERF_FILTER_TGID: + parent_key = bpf_get_current_pid_tgid() >> 32; + child_key = task->tgid; + if (child_key == parent_key) + return 0; + break; + default: + return 0; + } + + /* Check if the current task is one of the target tasks to be counted */ + parent_fval = bpf_map_lookup_elem(&filter, &parent_key); + if (!parent_fval) + return 0; + + /* Start counting for the new task by adding it into filter map, + * inherit the accum key of its parent task so that they can be + * counted together. + */ + child_fval.accum_key = parent_fval->accum_key; + child_fval.exited = 0; + bpf_map_update_elem(&filter, &child_key, &child_fval, BPF_NOEXIST); + + return 0; +} + +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/sched_process_exit") +int BPF_PROG(on_exittask, struct task_struct *task) +{ + __u32 pid; + struct bperf_filter_value *fval; + + if (!enabled) + return 0; + + /* Stop counting for this task by removing it from filter map. + * For TGID type, if the pid can be found in the map, it means that + * this pid belongs to the leader task. After the task exits, the + * tgid of its child tasks (if any) will be 1, so the pid can be + * safely removed. + */ + pid = task->pid; + fval = bpf_map_lookup_elem(&filter, &pid); + if (fval) + fval->exited = 1; + + return 0; +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h index 1ce0c2c905c112d68a0548cdaf484e20432cb459..4a4a753980bef8f03c82f9fbe529757866d4f8ca 100644 --- a/tools/perf/util/bpf_skel/bperf_u.h +++ b/tools/perf/util/bpf_skel/bperf_u.h @@ -11,4 +11,9 @@ enum bperf_filter_type { BPERF_FILTER_TGID, }; +struct bperf_filter_value { + __u32 accum_key; + __u8 exited; +}; + #endif /* __BPERF_STAT_U_H */ diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index d582cae8e1051a606d0b5b383584adf77c4df7b0..2ee2cc30340f0f1cc89018e508a6a8e64dbd5b4a 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -17,6 +17,7 @@ struct target { bool default_per_cpu; bool per_thread; bool use_bpf; + bool inherit; int initial_delay; const char *attr_map; };