From e70b55c8539b28c3822a9e13363ab2b14f990774 Mon Sep 17 00:00:00 2001 From: Cruz Zhao Date: Mon, 10 Feb 2025 17:28:16 +0800 Subject: [PATCH] anolis: sched: introduce ID_BOOK_CPU ANBZ: #22143 In high-concurrency scenarios, highclass tasks often select the same idle CPU when waking up, which can cause scheduling delays. The reason is that To reduce the scheduling delays caused by this situation, we introduce ID_BOOK_CPU: When highclass task selecting idle cpu, check whether it has been booked by other tasks and whether it's still idle before we select it to wake up. If the idle cpu we found has been booked by other tasks, select again, until book the idle cpu successfully or reach the retry limit. If the idle cpu hasn't been booked by any other, make rq->booked true to mark that the cpu is booked, and make rq->booked false after the highclass task actually enqueues into the rq of the cpu. To Enable ID_BOOK_CPU, echo ID_BOOK_CPU > /sys/kernel/debug/sched_features. To set retry limit, modify /proc/sys/kernel/sched_id_book_cpu_nr_tries. Signed-off-by: Cruz Zhao --- include/linux/sched/sysctl.h | 1 + kernel/sched/core.c | 1 + kernel/sched/fair.c | 80 +++++++++++++++++++++++++++++++++++- kernel/sched/features.h | 1 + kernel/sched/sched.h | 1 + kernel/sysctl.c | 7 ++++ 6 files changed, 90 insertions(+), 1 deletion(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 74bd54c1db5c..c66f2f1b784f 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -80,6 +80,7 @@ extern unsigned int sysctl_sched_cfs_bw_burst_enabled; #ifdef CONFIG_GROUP_IDENTITY extern unsigned int sysctl_sched_bvt_place_epsilon; extern unsigned int sysctl_sched_idle_saver_wmark; +extern unsigned int sysctl_sched_id_book_cpu_nr_tries; extern unsigned int sysctl_sched_group_indentity_enabled; extern int sched_group_identity_enable_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 598ddd3f7c41..8875da800891 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8657,6 +8657,7 @@ void __init sched_init(void) rq->expel_start = rq->clock; rq->expel_sum = 0; seqcount_init(&rq->expel_seq); + rq->booked = false; #endif #ifdef CONFIG_SCHED_CORE diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 63fbb1a3db71..3d2cc6b6cfb7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -157,6 +157,7 @@ unsigned int sysctl_sched_bvt_place_epsilon = 1000000UL; * Default: 0 msec, units: nanoseconds */ unsigned int sysctl_sched_idle_saver_wmark; +unsigned int sysctl_sched_id_book_cpu_nr_tries = 5; #ifdef CONFIG_SCHED_SMT /* @@ -1008,6 +1009,50 @@ static inline u64 get_avg_idle(struct rq *rq) return rq->avg_idle; } +DEFINE_PER_CPU(bool, has_id_idle_cpu); +static inline bool found_id_idle_cpu(void) +{ + if (group_identity_disabled()) + return false; + if (sched_feat(ID_BOOK_CPU)) + return this_cpu_read(has_id_idle_cpu); + return false; +} + +static inline void set_has_id_idle_cpu(bool has) +{ + if (group_identity_disabled()) + return; + if (sched_feat(ID_BOOK_CPU)) + this_cpu_write(has_id_idle_cpu, has); +} + +static inline bool rq_booked(struct rq *rq) +{ + if (!group_identity_enabled(rq)) + return false; + if (sched_feat(ID_BOOK_CPU)) + return rq->booked; + return false; +} + +static inline void set_rq_booked(struct rq *rq, bool booked) +{ + if (!group_identity_enabled(rq)) + return; + if (sched_feat(ID_BOOK_CPU)) + rq->booked = booked; +} + +static inline int get_id_book_cpu_nr_tries(void) +{ + if (group_identity_disabled()) + return 0; + if (sched_feat(ID_BOOK_CPU)) + return sysctl_sched_id_book_cpu_nr_tries; + return 0; +} + static noinline bool id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle) { @@ -1054,6 +1099,9 @@ id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle) if (need_expel) return false; + if (is_highclass_task(p) && rq_booked(rq)) + return false; + /* CPU full of underclass is idle for highclass */ if (!is_idle) { /* @@ -2324,6 +2372,12 @@ id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle) return is_idle; } +static inline bool found_id_idle_cpu(void) { return false; } +static inline void set_has_id_idle_cpu(bool has) { } +static inline bool rq_booked(struct rq *rq) { return false; } +static inline void set_rq_booked(struct rq *rq, bool booked) { } +static inline int get_id_book_cpu_nr_tries(void) { return 0; } + static inline void identity_init_cfs_rq(struct cfs_rq *cfs_rq) { } @@ -8276,6 +8330,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) assert_list_leaf_cfs_rq(rq); hrtick_update(rq); + + if (is_highclass_task(p)) + set_rq_booked(rq, false); } static void set_next_buddy(struct sched_entity *se); @@ -9156,6 +9213,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if ((unsigned int)recent_used_cpu < nr_cpumask_bits) return recent_used_cpu; + set_has_id_idle_cpu(false); return target; } @@ -9571,6 +9629,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f int new_cpu = prev_cpu; int want_affine = 0; int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING); + struct rq *rq; + struct rq_flags rf; /* Endow LS task the ability to balance at fork */ if (is_highclass_task(p) && (sd_flag & SD_BALANCE_FORK)) @@ -9619,9 +9679,27 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f /* Slow path */ new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag); } else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */ + int nr_tries = get_id_book_cpu_nr_tries(); /* Fast path */ - +select: + set_has_id_idle_cpu(true); new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); + if (is_highclass_task(p) && found_id_idle_cpu()) { + rq = cpu_rq(new_cpu); + rq_lock(rq, &rf); + if (!id_idle_cpu(p, new_cpu, false, NULL)) { + if (nr_tries > 0) { + nr_tries--; + rq_unlock(rq, &rf); + goto select; + } else { + rq_unlock(rq, &rf); + } + } else { + set_rq_booked(rq, true); + rq_unlock(rq, &rf); + } + } if (want_affine) current->recent_used_cpu = cpu; diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 4c65f2976d74..45e3c13747e6 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -107,6 +107,7 @@ SCHED_FEAT(ID_EXPELLER_SHARE_CORE, true) SCHED_FEAT(ID_ABSOLUTE_EXPEL, false) SCHED_FEAT(ID_LOAD_BALANCE, false) SCHED_FEAT(ID_PUSH_EXPELLEE, false) +SCHED_FEAT(ID_BOOK_CPU, false) #endif #ifdef CONFIG_SCHED_CORE diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 95de953ef6cf..587fc0c6b6ca 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1340,6 +1340,7 @@ struct rq { u64 under_exec_sum; u64 under_exec_stamp; u64 avg_id_idle; + bool booked; #ifdef CONFIG_SCHED_SMT unsigned long next_expel_ib; unsigned long next_expel_update; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7378b8fbd47f..de42025d06b6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2032,6 +2032,13 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sched_id_book_cpu_nr_tries", + .data = &sysctl_sched_id_book_cpu_nr_tries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "sched_expel_update_interval", .data = &sysctl_sched_expel_update_interval, -- Gitee