diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index f5d127a5d819bd1246da1e67bb1c898d25f51a1d..f7f8528de24fd940dadb92d26c16427a614d235c 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -16,22 +16,31 @@ extern void context_tracking_cpu_set(int cpu); /* Called with interrupts disabled. */ extern void __context_tracking_enter(enum ctx_state state); extern void __context_tracking_exit(enum ctx_state state); +extern void __sys_tracking_enter(enum sys_state state); +extern void __sys_tracking_exit(enum sys_state state); +extern void sys_tracking_enter(enum sys_state state); +extern void sys_tracking_exit(enum sys_state state); extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_user_enter(void); extern void context_tracking_user_exit(void); +extern bool is_sys_aware_enabled(void); + static inline void user_enter(void) { if (context_tracking_enabled()) context_tracking_enter(CONTEXT_USER); - + if (is_sys_aware_enabled()) + sys_tracking_enter(ST_USER); } static inline void user_exit(void) { if (context_tracking_enabled()) context_tracking_exit(CONTEXT_USER); + if (is_sys_aware_enabled()) + sys_tracking_exit(ST_USER); } /* Called with interrupts disabled. */ @@ -39,12 +48,16 @@ static __always_inline void user_enter_irqoff(void) { if (context_tracking_enabled()) __context_tracking_enter(CONTEXT_USER); + if (is_sys_aware_enabled()) + __sys_tracking_enter(ST_USER); } static __always_inline void user_exit_irqoff(void) { if (context_tracking_enabled()) __context_tracking_exit(CONTEXT_USER); + if (is_sys_aware_enabled()) + __sys_tracking_exit(ST_USER); } static inline enum ctx_state exception_enter(void) @@ -61,6 +74,20 @@ static inline enum ctx_state exception_enter(void) return prev_ctx; } +static inline enum sys_state st_exception_enter(void) +{ + enum sys_state prev_sys; + + if (!is_sys_aware_enabled()) + return 0; + + prev_sys = this_cpu_read(sys_tracking.state); + if (prev_sys != ST_KERNEL) + sys_tracking_exit(prev_sys); + + return prev_sys; +} + static inline void exception_exit(enum ctx_state prev_ctx) { if (context_tracking_enabled()) { @@ -69,6 +96,14 @@ static inline void exception_exit(enum ctx_state prev_ctx) } } +static inline void st_exception_exit(enum sys_state prev_sys) +{ + if (is_sys_aware_enabled()) { + if (prev_sys != ST_KERNEL) + sys_tracking_enter(prev_sys); + } +} + /** * ct_state() - return the current context tracking state if known @@ -82,14 +117,22 @@ static __always_inline enum ctx_state ct_state(void) return context_tracking_enabled() ? this_cpu_read(context_tracking.state) : CONTEXT_DISABLED; } +static __always_inline enum sys_state sys_state(void) +{ + return is_sys_aware_enabled() ? + this_cpu_read(sys_tracking.state) : ST_DISABLED; +} #else static inline void user_enter(void) { } static inline void user_exit(void) { } static inline void user_enter_irqoff(void) { } static inline void user_exit_irqoff(void) { } static inline enum ctx_state exception_enter(void) { return 0; } +static inline enum sys_state st_exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } +static inline void st_exception_exit(enum sys_state prev_sys) { } static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } +static inline enum sys_state sys_state(void) { return ST_DISABLED; } #endif /* !CONFIG_CONTEXT_TRACKING */ #define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond)) @@ -115,6 +158,9 @@ static __always_inline void guest_enter_irqoff(void) if (context_tracking_enabled()) __context_tracking_enter(CONTEXT_GUEST); + if (is_sys_aware_enabled()) + __sys_tracking_enter(ST_GUEST); + /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspace from rcu point of view. In @@ -133,6 +179,9 @@ static __always_inline void context_tracking_guest_exit(void) { if (context_tracking_enabled()) __context_tracking_exit(CONTEXT_GUEST); + + if (is_sys_aware_enabled()) + __sys_tracking_exit(ST_GUEST); } static __always_inline void vtime_account_guest_exit(void) diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 65a60d3313b006e40d27da7418b8174564b82d39..d045739e01f4d56f10dc03cfa3a752c100174a7a 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -22,9 +22,19 @@ struct context_tracking { } state; }; +struct sys_tracking { + enum sys_state { + ST_DISABLED = -1, /* returned by sys_tracking_state() if unknown */ + ST_KERNEL = 0, + ST_USER, + ST_GUEST, + } state; +}; + #ifdef CONFIG_CONTEXT_TRACKING extern struct static_key_false context_tracking_key; DECLARE_PER_CPU(struct context_tracking, context_tracking); +DECLARE_PER_CPU(struct sys_tracking, sys_tracking); static __always_inline bool context_tracking_enabled(void) { diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 36a98c48aedc7c6c14e732a7ce3f360a1b8f3087..75a4e2d4c5dea968a9efd05e7aeae1a97c93990b 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -29,6 +29,7 @@ DEFINE_STATIC_KEY_FALSE(context_tracking_key); EXPORT_SYMBOL_GPL(context_tracking_key); DEFINE_PER_CPU(struct context_tracking, context_tracking); +DEFINE_PER_CPU(struct sys_tracking, sys_tracking); EXPORT_SYMBOL_GPL(context_tracking); static noinstr bool context_tracking_recursion_enter(void) @@ -103,6 +104,16 @@ void noinstr __context_tracking_enter(enum ctx_state state) } EXPORT_SYMBOL_GPL(__context_tracking_enter); +void noinstr __sys_tracking_enter(enum sys_state state) +{ + if (!is_sys_aware_enabled()) + return; + + if (__this_cpu_read(sys_tracking.state) != state) + __this_cpu_write(sys_tracking.state, state); +} +EXPORT_SYMBOL_GPL(__sys_tracking_enter); + void context_tracking_enter(enum ctx_state state) { unsigned long flags; @@ -125,6 +136,22 @@ void context_tracking_enter(enum ctx_state state) NOKPROBE_SYMBOL(context_tracking_enter); EXPORT_SYMBOL_GPL(context_tracking_enter); +void sys_tracking_enter(enum sys_state state) +{ + unsigned long flags; + + if (!is_sys_aware_enabled()) + return; + + if (in_interrupt()) + return; + local_irq_save(flags); + __sys_tracking_enter(state); + local_irq_restore(flags); +} +NOKPROBE_SYMBOL(sys_tracking_enter); +EXPORT_SYMBOL_GPL(sys_tracking_enter); + void context_tracking_user_enter(void) { user_enter(); @@ -168,6 +195,16 @@ void noinstr __context_tracking_exit(enum ctx_state state) } EXPORT_SYMBOL_GPL(__context_tracking_exit); +void noinstr __sys_tracking_exit(enum sys_state state) +{ + if (!is_sys_aware_enabled()) + return; + + if (__this_cpu_read(sys_tracking.state) == state) + __this_cpu_write(sys_tracking.state, ST_KERNEL); +} +EXPORT_SYMBOL_GPL(__sys_tracking_exit); + void context_tracking_exit(enum ctx_state state) { unsigned long flags; @@ -182,6 +219,22 @@ void context_tracking_exit(enum ctx_state state) NOKPROBE_SYMBOL(context_tracking_exit); EXPORT_SYMBOL_GPL(context_tracking_exit); +void sys_tracking_exit(enum sys_state state) +{ + unsigned long flags; + + if (!is_sys_aware_enabled()) + return; + + if (in_interrupt()) + return; + local_irq_save(flags); + __sys_tracking_exit(state); + local_irq_restore(flags); +} +NOKPROBE_SYMBOL(sys_tracking_exit); +EXPORT_SYMBOL_GPL(sys_tracking_exit); + void context_tracking_user_exit(void) { user_exit(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1b325aee62a3aeba0ca7591bc7af5efc11325fe8..cb4cb2bc16d74bab824546209760b82a8781398d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5887,7 +5887,9 @@ asmlinkage __visible void __sched schedule_user(void) * too frequently to make sense yet. */ enum ctx_state prev_state = exception_enter(); + enum sys_state prev_st_state = st_exception_enter(); schedule(); + st_exception_exit(prev_st_state); exception_exit(prev_state); } #endif @@ -5968,6 +5970,7 @@ EXPORT_SYMBOL(preempt_schedule); asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) { enum ctx_state prev_ctx; + enum sys_state prev_st_state; if (likely(!preemptible())) return; @@ -5994,7 +5997,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) * an infinite recursion. */ prev_ctx = exception_enter(); + prev_st_state = st_exception_enter(); __schedule(true); + st_exception_exit(prev_st_state); exception_exit(prev_ctx); preempt_latency_stop(1); @@ -6014,11 +6019,13 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace); asmlinkage __visible void __sched preempt_schedule_irq(void) { enum ctx_state prev_state; + enum sys_state prev_st_state; /* Catch callers which need to be fixed */ BUG_ON(preempt_count() || !irqs_disabled()); prev_state = exception_enter(); + prev_st_state = st_exception_enter(); do { preempt_disable(); @@ -6028,6 +6035,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) sched_preempt_enable_no_resched(); } while (need_resched()); + st_exception_exit(prev_st_state); exception_exit(prev_state); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1fc6bc53c8813b7ad2e157cef930fa96eb4e4517..b2fbcb49b737360d5e5cc54e9bb59cf2dc799c33 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -21,7 +21,7 @@ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra */ #include "sched.h" - +#include /* * Targeted preemption latency for CPU-bound tasks: * @@ -1056,8 +1056,35 @@ static inline int get_id_book_cpu_nr_tries(void) return 0; } +bool is_sys_aware_enabled(void) +{ + return sched_feat(ID_SYS_AWARE); +} +EXPORT_SYMBOL_GPL(is_sys_aware_enabled); + +#if defined(CONFIG_PREEMPT) || !defined(CONFIG_CONTEXT_TRACKING) +static inline bool is_cpu_in_sys_mode(int cpu) +{ + return false; +} +#else +static inline bool is_cpu_in_sys_mode(int cpu) +{ + if (!is_sys_aware_enabled()) + return false; + + if (!cpu_online(cpu)) + return false; + + if (cpu_rq(cpu)->curr == cpu_rq(cpu)->idle) + return false; + + return per_cpu(sys_tracking.state, cpu) == ST_KERNEL; +} +#endif + static noinline bool -id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle) +id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle, bool *is_sys) { struct rq *rq; bool need_expel; @@ -1107,6 +1134,10 @@ id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle) /* CPU full of underclass is idle for highclass */ if (!is_idle) { + + if (is_sys && is_highclass_task(p) && is_cpu_in_sys_mode(cpu)) + *is_sys = true; + /* * For ID_LOAD_BALANCE, CPU full of underclass is also idle * for normal. @@ -2353,9 +2384,16 @@ id_wake_affine(struct task_struct *p, int this_cpu, int prev_cpu) { return true; } - +bool is_sys_aware_enabled(void) +{ + return false; +} +static inline bool is_cpu_in_sys_mode(int cpu) +{ + return false; +} static inline bool -id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle) +id_idle_cpu(struct task_struct *p, int cpu, bool expellee, bool *idle, bool *is_sys) { bool is_idle = available_idle_cpu(cpu); @@ -6875,7 +6913,7 @@ static void __push_expellee(struct rq *rq) for_each_cpu_wrap(i, traverse_mask, cpu) { struct rq *tmp_rq = cpu_rq(i); - if (id_idle_cpu(p, i, true, &idle)) { + if (id_idle_cpu(p, i, true, &idle, NULL)) { dst_cpu = i; dst_rq = cpu_rq(dst_cpu); /* @@ -8852,7 +8890,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p static inline int __select_idle_cpu(int cpu, struct task_struct *p, int *id_backup) { - bool idle, is_seeker, is_expellee; + bool idle, is_seeker, is_expellee, is_sys = false; is_seeker = is_idle_seeker_task(p); is_expellee = is_expellee_task(p); @@ -8862,12 +8900,13 @@ static inline int __select_idle_cpu(int cpu, struct task_struct *p, int *id_back * a backup option, which will be pick only when * failed to locate a real idle one. */ - if ((id_idle_cpu(p, cpu, is_expellee, &idle) || sched_idle_cpu(cpu)) && + if ((id_idle_cpu(p, cpu, is_expellee, &idle, &is_sys) || sched_idle_cpu(cpu)) && sched_cpu_cookie_match(cpu_rq(cpu), p)) { if (!group_identity_disabled()) { if (idle || !is_seeker) return cpu; - *id_backup = cpu; + if (*id_backup == -1 || !is_sys) + *id_backup = cpu; } else return cpu; } @@ -8978,7 +9017,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t if (!cpumask_test_cpu(cpu, task_allowed_cpu(p)) || !cpumask_test_cpu(cpu, sched_domain_span(sd))) continue; - if (id_idle_cpu(p, cpu, is_expellee, NULL) || sched_idle_cpu(cpu)) + if (id_idle_cpu(p, cpu, is_expellee, NULL, NULL) || sched_idle_cpu(cpu)) return cpu; } @@ -9176,7 +9215,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) */ lockdep_assert_irqs_disabled(); - if ((id_idle_cpu(p, target, is_expellee, NULL) || sched_idle_cpu(target)) && + if ((id_idle_cpu(p, target, is_expellee, NULL, NULL) || sched_idle_cpu(target)) && asym_fits_cpu(task_util, util_min, util_max, target)) return target; @@ -9184,7 +9223,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) * If the previous CPU is cache affine and idle, don't be stupid: */ if (prev != target && cpus_share_cache(prev, target) && - (id_idle_cpu(p, prev, is_expellee, NULL) || sched_idle_cpu(prev)) && + (id_idle_cpu(p, prev, is_expellee, NULL, NULL) || sched_idle_cpu(prev)) && asym_fits_cpu(task_util, util_min, util_max, prev)) { if (!static_branch_unlikely(&sched_cluster_active) || @@ -9215,7 +9254,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (recent_used_cpu != prev && recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && - (id_idle_cpu(p, recent_used_cpu, is_expellee, NULL) || + (id_idle_cpu(p, recent_used_cpu, is_expellee, NULL, NULL) || sched_idle_cpu(recent_used_cpu)) && cpumask_test_cpu(p->recent_used_cpu, task_allowed_cpu(p)) && asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) { @@ -9756,7 +9795,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (is_highclass_task(p) && found_id_idle_cpu()) { rq = cpu_rq(new_cpu); rq_lock(rq, &rf); - if (!id_idle_cpu(p, new_cpu, false, NULL)) { + if (!id_idle_cpu(p, new_cpu, false, NULL, NULL)) { if (nr_tries > 0) { nr_tries--; rq_unlock(rq, &rf); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index ee41c2abc9de6cc9a5a8717c7613d4be83b567aa..7ae9c211cd2d84751eff4567129ef4bf93261e2d 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -108,6 +108,7 @@ SCHED_FEAT(ID_ABSOLUTE_EXPEL, false) SCHED_FEAT(ID_LOAD_BALANCE, false) SCHED_FEAT(ID_PUSH_EXPELLEE, false) SCHED_FEAT(ID_BOOK_CPU, false) +SCHED_FEAT(ID_SYS_AWARE, false) #endif #ifdef CONFIG_SCHED_CORE