diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index db585d960d64cb4ffe486b723e1811d645994ddd..7e474c6fc4a7b2f3392434c25a8a15bed496f92b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3903,7 +3903,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_cpumask_info(struct bpf_cpumask_info *cpus, int len) + * int bpf_get_cpumask_info(struct bpf_map *map, struct bpf_cpumask_info *cpus) * Description * Get system cpus returned in *cpus*. * Return diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c index 7485077d5a933b3bba5667823c39eeff5d045260..99211235a028c63bba87573d46d628f64a2d65f2 100644 --- a/kernel/sched/bpf_sched.c +++ b/kernel/sched/bpf_sched.c @@ -62,7 +62,7 @@ BPF_CALL_1(bpf_sched_entity_to_tgidpid, struct sched_entity *, se) BPF_CALL_1(bpf_sched_entity_to_cgrpid, struct sched_entity *, se) { #ifdef CONFIG_FAIR_GROUP_SCHED - if (!entity_is_task(se)) + if (!entity_is_task(se) && se->my_q->tg->css.cgroup) return cgroup_id(se->my_q->tg->css.cgroup); #endif return (u64) -1; @@ -82,9 +82,11 @@ BPF_CALL_2(bpf_sched_entity_belongs_to_cgrp, struct sched_entity *, se, cgrp = se->my_q->tg->css.cgroup; #endif - for (level = cgrp->level; level; level--) - if (cgrp->ancestor_ids[level] == cgrpid) - return 1; + if (cgrp) { + for (level = cgrp->level; level; level--) + if (cgrp->ancestor_ids[level] == cgrpid) + return 1; + } #endif return 0; } diff --git a/kernel/sched/bpf_topology.c b/kernel/sched/bpf_topology.c index 9c2eda139e2a29990a562ef5a9fe5785afa747f2..843b6092a64ff793f350633113f05117918a581e 100644 --- a/kernel/sched/bpf_topology.c +++ b/kernel/sched/bpf_topology.c @@ -70,10 +70,9 @@ const struct bpf_func_proto bpf_init_cpu_topology_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_2(bpf_get_cpumask_info, struct bpf_cpumask_info *, cpus, - int, len) +BPF_CALL_2(bpf_get_cpumask_info, struct bpf_map *, map, struct bpf_cpumask_info *, cpus) { - if (len != sizeof(*cpus)) + if (!cpus) return -EINVAL; cpumask_copy(&cpus->cpu_possible_cpumask, cpu_possible_mask); @@ -92,6 +91,6 @@ const struct bpf_func_proto bpf_get_cpumask_info_proto = { .func = bpf_get_cpumask_info, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_UNINIT_MEM, - .arg2_type = ARG_CONST_SIZE, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, }; diff --git a/samples/bpf/sched_select_core_kern.c b/samples/bpf/sched_select_core_kern.c index 18617e89b3957af3e8a4657ad0b2dca5374aeace..30765ae65f8505575759ca60dbf73798dd442e15 100644 --- a/samples/bpf/sched_select_core_kern.c +++ b/samples/bpf/sched_select_core_kern.c @@ -62,7 +62,7 @@ struct tag_info { struct tag_info tag_tbl[] = { {TAG_NONE, ""}, - {TAG_ID(1), "0-3"}, + {TAG_ID(1), "0-1"}, {TAG_ID(2), "4-7"}, {TAG_MAX, ""}, }; @@ -94,13 +94,17 @@ static struct cpumask *select_better_cpus(struct task_struct *p, long min_util = INT_MIN; struct task_group *tg; long spare; - int cpu; + int cpu, i; if (!prefer_cpus_valid(prefer_cpus, (void *)getVal(p->cpus_ptr))) return (void *)getVal(p->cpus_ptr); tg = p->sched_task_group; - libbpf_for_each_cpu(cpu, prefer_cpus) { + for (i = 0, cpu = -1; i < BPF_SCHED_LOOP_MAX; i++) { + cpu = libbpf_cpumask_next(cpu, (void *)getVal(prefer_cpus)); + if (cpu >= libbpf_nr_cpus_ids()) + break; + if (idlest_cpu && libbpf_available_idle_cpu(cpu)) { *idlest_cpu = cpu; } else if (idlest_cpu) { @@ -159,9 +163,14 @@ int BPF_PROG(cfs_select_cpu_range, struct sched_migrate_ctx *h_ctx) SEC("sched/cfs_select_rq_exit") int BPF_PROG(cfs_select_cpu_range_exit, struct sched_migrate_ctx *h_ctx) { + struct task_struct *p = getVal(h_ctx->task); + long tag = getVal(p->tag); int *idlest_cpu; int key = 0; + if (tag <= TAG_NONE || tag >= TAG_MAX) + return SELECT_RQ_EXIT_CPU_VALID; + idlest_cpu = bpf_map_lookup_elem(&map_idlest_cpu, &key); if (!idlest_cpu) { libbpf_sched_set_task_cpus_ptr(h_ctx, (void *)getVal(h_ctx->cpus_allowed)); @@ -186,7 +195,7 @@ static int find_idlest_cpu(struct task_struct *p, int parent) int cpu; int i; - for (i = 0, cpu = -1; i < NR_CPUS; i++) { + for (i = 0, cpu = -1; i < BPF_SCHED_LOOP_MAX; i++) { cpu = libbpf_cpumask_next(cpu, (void *)getVal(p->cpus_ptr)); if (cpu >= libbpf_nr_cpus_ids()) break; @@ -203,17 +212,26 @@ static int find_idlest_cpu(struct task_struct *p, int parent) static int select_idle_cpu(struct task_struct *p, int parent, int prev_cpu) { - int cpu; + int cpu, i; if (libbpf_available_idle_cpu(prev_cpu)) return prev_cpu; if (libbpf_available_idle_cpu(parent)) - return prev_cpu; + return parent; + + cpu = libbpf_cpumask_next_wrap(prev_cpu - 1, + (void *)getVal(p->cpus_ptr), + prev_cpu, false); + for (i = 0; i < BPF_SCHED_LOOP_MAX; i++) { + if (cpu >= libbpf_nr_cpumask_bits()) + break; - libbpf_for_each_cpu_wrap(cpu, (void *)getVal(p->cpus_ptr), prev_cpu) { if (libbpf_available_idle_cpu(cpu)) return cpu; + + cpu = libbpf_cpumask_next_wrap(cpu, (void *)getVal(p->cpus_ptr), + prev_cpu, true); } return prev_cpu; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 57b927e99092cbf3cd7dac8d470c4c25302891e4..22f7880a0bb63e588394c08df745f02904f539f9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4613,7 +4613,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_cpumask_info(struct bpf_cpumask_info *cpus, int len) + * int bpf_get_cpumask_info(struct bpf_map *map, struct bpf_cpumask_info *cpus) * Description * Get system cpus returned in *cpus*. * Return diff --git a/tools/lib/bpf/libbpf_sched.h b/tools/lib/bpf/libbpf_sched.h index 6cb30e8e81f802ebfdbc791e3b8bb4ae78653886..187e854b99b3e5b42d9b1de2fe454aba18559c3f 100644 --- a/tools/lib/bpf/libbpf_sched.h +++ b/tools/lib/bpf/libbpf_sched.h @@ -21,6 +21,8 @@ #include #include +/* set bigger value may lead verifier failed */ +#define BPF_SCHED_LOOP_MAX 1024 #define INVALID_PTR ((void *)(0UL)) #define getVal(P) \ ({ \ @@ -69,6 +71,13 @@ static __always_inline int libbpf_nr_cpumask_bits(void); #endif +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct bpf_cpumask_info); + __uint(max_entries, 1); +} map_cpumask_info SEC(".maps"); + static __always_inline long libbpf_cpumask_copy(struct cpumask *dst, struct cpumask *src) { @@ -228,58 +237,93 @@ static __always_inline long libbpf_cpumask_cpulist_parse(char *src1, static __always_inline int libbpf_num_active_cpus(void) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; + + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return -1; - bpf_get_cpumask_info(&cpus, sizeof(cpus)); - return getVal(cpus.nums_active_cpus); + bpf_get_cpumask_info(&map_cpumask_info, cpus); + return getVal(cpus->nums_active_cpus); } static __always_inline int libbpf_num_possible_cpus(void) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; - bpf_get_cpumask_info(&cpus, sizeof(cpus)); - return getVal(cpus.nums_possible_cpus); + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return -1; + + bpf_get_cpumask_info(&map_cpumask_info, cpus); + return getVal(cpus->nums_possible_cpus); } static __always_inline void libbpf_possible_cpus_mask(struct cpumask *mask) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; + + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return; - bpf_get_cpumask_info(&cpus, sizeof(cpus)); - libbpf_cpumask_copy(mask, &cpus.cpu_possible_cpumask); + bpf_get_cpumask_info(&map_cpumask_info, cpus); + libbpf_cpumask_copy(mask, &cpus->cpu_possible_cpumask); } static __always_inline void libbpf_active_cpus_mask(struct cpumask *mask) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; - bpf_get_cpumask_info(&cpus, sizeof(cpus)); - libbpf_cpumask_copy(mask, &cpus.cpu_active_cpumask); + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return; + + bpf_get_cpumask_info(&map_cpumask_info, cpus); + libbpf_cpumask_copy(mask, &cpus->cpu_active_cpumask); } static __always_inline void libbpf_isolate_cpus_mask(struct cpumask *mask) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; + + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return; - bpf_get_cpumask_info(&cpus, sizeof(cpus)); - libbpf_cpumask_copy(mask, &cpus.cpu_isolate_cpumask); + bpf_get_cpumask_info(&map_cpumask_info, cpus); + libbpf_cpumask_copy(mask, &cpus->cpu_isolate_cpumask); } static __always_inline int libbpf_nr_cpus_ids(void) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; - bpf_get_cpumask_info(&cpus, sizeof(cpus)); - return getVal(cpus.nr_cpu_ids); + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return -1; + + bpf_get_cpumask_info(&map_cpumask_info, cpus); + return getVal(cpus->nr_cpu_ids); } static __always_inline int libbpf_nr_cpumask_bits(void) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; + + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return -1; - bpf_get_cpumask_info(&cpus, sizeof(cpus)); - return getVal(cpus.bpf_nr_cpumask_bits); + bpf_get_cpumask_info(&map_cpumask_info, cpus); + return getVal(cpus->bpf_nr_cpumask_bits); } static __always_inline unsigned long libbpf_cfs_load_avg_of(int cpu)