diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5422d1502fd6f2817c80d962fe872708f053a401..4c44afb9cc74f45bd03c7b162533852ff1bf4ad1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -105,6 +105,7 @@ config ARM64 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_SCHED_SOFT_QUOTA select ARCH_SUPPORTS_PAGE_TABLE_CHECK select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 627fc381a0e4539bd56fa10f574800cd13708e80..ed14fb2787dcad15af0400d286aa6c8f933d441d 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -209,6 +209,7 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_SCHED_STEAL=y +CONFIG_SCHED_SOFT_QUOTA=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_RELAY=y diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c index 46a40b693da86c3348f960c1a0fa9e250c0e54af..31d9bfbe10b88fc928759d5a341a834d1fd375f2 100644 --- a/arch/arm64/kernel/idle.c +++ b/arch/arm64/kernel/idle.c @@ -45,6 +45,26 @@ void noinstr arch_cpu_idle(void) } EXPORT_SYMBOL_GPL(arch_cpu_idle); +#ifdef CONFIG_SCHED_SOFT_QUOTA +static DEFINE_PER_CPU(int, sibling_idle) = 1; + +int is_sibling_idle(void) +{ + return this_cpu_read(sibling_idle); +} + +static void smt_measurement_begin(void) +{ +} + +static void smt_measurement_done(void) +{ +} +#else +static inline void smt_measurement_begin(void) { } +static inline void smt_measurement_done(void) { } +#endif + #ifdef CONFIG_ACTLR_XCALL_XINT struct arm_cpuidle_xcall_xint_context { unsigned long actlr_el1; @@ -57,6 +77,8 @@ void arch_cpu_idle_enter(void) { struct arm_cpuidle_xcall_xint_context *context; + smt_measurement_begin(); + if (!system_uses_xcall_xint()) return; @@ -71,6 +93,8 @@ void arch_cpu_idle_exit(void) { struct arm_cpuidle_xcall_xint_context *context; + smt_measurement_done(); + if (!system_uses_xcall_xint()) return; @@ -81,6 +105,13 @@ void arch_cpu_idle_exit(void) put_cpu_var(contexts); } #else -void arch_cpu_idle_enter(void) {} -void arch_cpu_idle_exit(void) {} +void arch_cpu_idle_enter(void) +{ + smt_measurement_begin(); +} + +void arch_cpu_idle_exit(void) +{ + smt_measurement_done(); +} #endif diff --git a/init/Kconfig b/init/Kconfig index c8bd58347a871946bf3c1ea9f91ca8a2b5eb349c..9310fcda298876fb9764def86756c6281c7473da 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1174,6 +1174,24 @@ config SCHED_SOFT_DOMAIN If in doubt, say N. +# +# For architectures that want to enable the support for SCHED_SOFT_QUOTA +# +config ARCH_SUPPORTS_SCHED_SOFT_QUOTA + bool + +config SCHED_SOFT_QUOTA + bool "More flexible use of CPU quota" + depends on ARCH_SUPPORTS_SCHED_SOFT_QUOTA + depends on CFS_BANDWIDTH + default n + help + This option allows users to use CPU quota more flexibly when CPU + is idle. It is better for users to have some understanding of + CFS_BANDWIDTH. It cannot be used in scenarios where there are strict + restrictions on the use of the CPU quota, such as some commercial + scenarios that charge based on the use of CPU quota. + config SCHED_MM_CID def_bool n depends on SMP && RSEQ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1b497efc763b538f808da9942d6f60356bde21f0..6bc9897569a9b667744f9228f7f990cf4a37fe8e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -11682,6 +11682,30 @@ static inline s64 cpu_qos_read(struct cgroup_subsys_state *css, } #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA +static int cpu_soft_quota_write(struct cgroup_subsys_state *css, + struct cftype *cftype, s64 soft_quota) +{ + struct task_group *tg = css_tg(css); + + if (soft_quota != 1 && soft_quota != 0) + return -EINVAL; + + if (tg->soft_quota == soft_quota) + return 0; + + tg->soft_quota = soft_quota; + + return 0; +} + +static inline s64 cpu_soft_quota_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return css_tg(css)->soft_quota; +} +#endif + #ifdef CONFIG_BPF_SCHED void sched_settag(struct task_struct *tsk, s64 tag) { @@ -11928,6 +11952,14 @@ static struct cftype cpu_legacy_files[] = { .write_s64 = cpu_qos_write, }, #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + { + .name = "soft_quota", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = cpu_soft_quota_read, + .write_s64 = cpu_soft_quota_write, + }, +#endif #ifdef CONFIG_BPF_SCHED { .name = "tag", diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f619dd53cc497f59217fd88ca61b52c76e1ba9c9..d6aca74a27de35370ab6df6cb8aa0f0e761285ca 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -186,6 +186,10 @@ unsigned int sysctl_qos_level_weights[5] = { static long qos_reweight(long shares, struct task_group *tg); #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA +static DEFINE_PER_CPU_SHARED_ALIGNED(struct list_head, soft_quota_throttled_cfs_rq); +#endif + #ifdef CONFIG_CFS_BANDWIDTH /* * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool @@ -219,6 +223,10 @@ extern unsigned int sysctl_smart_grid_strategy_ctrl; static int sysctl_affinity_adjust_delay_ms = 5000; #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA +unsigned int sysctl_soft_runtime_ratio = 20; +#endif + #ifdef CONFIG_SYSCTL static struct ctl_table sched_fair_sysctls[] = { { @@ -318,6 +326,17 @@ static struct ctl_table sched_fair_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = &hundred_thousand, }, +#endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + { + .procname = "sched_soft_runtime_ratio", + .data = &sysctl_soft_runtime_ratio, + .maxlen = sizeof(sysctl_soft_runtime_ratio), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_ONE_HUNDRED, + }, #endif {} }; @@ -588,10 +607,11 @@ static inline struct sched_entity *parent_entity(const struct sched_entity *se) return se->parent; } -static void +static bool find_matching_se(struct sched_entity **se, struct sched_entity **pse) { int se_depth, pse_depth; + bool ret = false; /* * preemption test can be made between sibling entities who are in the @@ -605,6 +625,10 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) pse_depth = (*pse)->depth; while (se_depth > pse_depth) { +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (!ret && cfs_rq_of(*se)->soft_quota_enable == 1) + ret = true; +#endif se_depth--; *se = parent_entity(*se); } @@ -615,9 +639,15 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) } while (!is_same_group(*se, *pse)) { +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (!ret && cfs_rq_of(*se)->soft_quota_enable == 1) + ret = true; +#endif *se = parent_entity(*se); *pse = parent_entity(*pse); } + + return ret; } static int tg_is_idle(struct task_group *tg) @@ -663,9 +693,10 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) return NULL; } -static inline void +static inline bool find_matching_se(struct sched_entity **se, struct sched_entity **pse) { + return false; } static inline int tg_is_idle(struct task_group *tg) @@ -6026,6 +6057,14 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) SCHED_WARN_ON(cfs_rq->throttled_clock); if (cfs_rq->nr_running) cfs_rq->throttled_clock = rq_clock(rq); + +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (cfs_rq->tg->soft_quota == 1) { + list_add(&cfs_rq->soft_quota_throttled_list, + &per_cpu(soft_quota_throttled_cfs_rq, cpu_of(rq))); + } +#endif + return true; } @@ -6042,6 +6081,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) se = cfs_rq->tg->se[cpu_of(rq)]; +#ifdef CONFIG_SCHED_SOFT_QUOTA + list_del_init(&cfs_rq->soft_quota_throttled_list); +#endif + #ifdef CONFIG_QOS_SCHED /* * if this cfs_rq throttled by qos, not need unthrottle it. @@ -6240,6 +6283,16 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) } rq_lock_irqsave(rq, &rf); + +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (cfs_rq->soft_quota_enable == 1) { + if (cfs_rq->runtime_remaining > 0) + cfs_rq->runtime_remaining = 0; + + cfs_rq->soft_quota_enable = 0; + } +#endif + if (!cfs_rq_throttled(cfs_rq)) goto next; @@ -6302,6 +6355,17 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) return throttled; } +#ifdef CONFIG_SCHED_SOFT_QUOTA +static inline void init_tg_sum_soft_runtime(struct cfs_bandwidth *cfs_b) +{ + unsigned int cpu; + struct task_group *tg = container_of(cfs_b, struct task_group, cfs_bandwidth); + + for_each_possible_cpu(cpu) + tg->cfs_rq[cpu]->sum_soft_runtime = 0; +} +#endif + /* * Responsible for refilling a task_group's bandwidth and unthrottling its * cfs_rqs as appropriate. If there has been no activity within the last @@ -6319,6 +6383,10 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u throttled = !list_empty(&cfs_b->throttled_cfs_rq); cfs_b->nr_periods += overrun; +#ifdef CONFIG_SCHED_SOFT_QUOTA + init_tg_sum_soft_runtime(cfs_b); +#endif + /* Refill extra burst quota even if cfs_b->idle */ __refill_cfs_bandwidth_runtime(cfs_b); @@ -6633,6 +6701,9 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) #ifdef CONFIG_QOS_SCHED INIT_LIST_HEAD(&cfs_rq->qos_throttled_list); #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + INIT_LIST_HEAD(&cfs_rq->soft_quota_throttled_list); +#endif } void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) @@ -9440,6 +9511,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ struct cfs_rq *cfs_rq = task_cfs_rq(curr); int next_buddy_marked = 0; int cse_is_idle, pse_is_idle; + bool ret = 0; if (unlikely(se == pse)) return; @@ -9474,7 +9546,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (!sched_feat(WAKEUP_PREEMPTION)) return; - find_matching_se(&se, &pse); + ret = find_matching_se(&se, &pse); +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (ret) + goto preempt; +#endif + WARN_ON_ONCE(!pse); cse_is_idle = se_is_idle(se); @@ -14959,6 +15036,9 @@ void unregister_fair_sched_group(struct task_group *tg) unsigned long flags; struct rq *rq; int cpu; +#ifdef CONFIG_SCHED_SOFT_QUOTA + struct cfs_rq *cfs_rq; +#endif destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); destroy_auto_affinity(tg); @@ -14968,10 +15048,16 @@ void unregister_fair_sched_group(struct task_group *tg) if (tg->se[cpu]) remove_entity_load_avg(tg->se[cpu]); - #ifdef CONFIG_QOS_SCHED - if (tg->cfs_rq && tg->cfs_rq[cpu]) - unthrottle_qos_sched_group(tg->cfs_rq[cpu]); - #endif +#ifdef CONFIG_QOS_SCHED + if (tg->cfs_rq && tg->cfs_rq[cpu]) + unthrottle_qos_sched_group(tg->cfs_rq[cpu]); +#endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (tg->cfs_rq && tg->cfs_rq[cpu]) { + cfs_rq = tg->cfs_rq[cpu]; + list_del_init(&cfs_rq->soft_quota_throttled_list); + } +#endif /* * Only empty task groups can be destroyed; so we can speculatively @@ -15286,6 +15372,11 @@ __init void init_sched_fair_class(void) INIT_LIST_HEAD(&per_cpu(qos_throttled_cfs_rq, i)); #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(soft_quota_throttled_cfs_rq, i)); +#endif + open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); #ifdef CONFIG_NO_HZ_COMMON @@ -15296,3 +15387,66 @@ __init void init_sched_fair_class(void) #endif /* SMP */ } + +#ifdef CONFIG_SCHED_SOFT_QUOTA +static bool check_soft_runtime(struct task_group *tg, int slice) +{ + int cpu; + u64 sum_soft_runtime = slice; + struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; + + if (cfs_b->quota == RUNTIME_INF) + return true; + + for_each_possible_cpu(cpu) + sum_soft_runtime += tg->cfs_rq[cpu]->sum_soft_runtime; + + return sum_soft_runtime < sysctl_soft_runtime_ratio * cfs_b->quota / 100; +} + +int __weak is_sibling_idle(void) +{ + return 0; +} + +bool unthrottle_cfs_rq_soft_quota(struct rq *rq) +{ + int max_cnt = 0; + bool ret = false; + struct cfs_rq *cfs_rq, *tmp_rq; + struct cfs_bandwidth *cfs_b; + int slice = sched_cfs_bandwidth_slice(); + + if (!is_sibling_idle()) + return ret; + + list_for_each_entry_safe(cfs_rq, tmp_rq, &per_cpu(soft_quota_throttled_cfs_rq, cpu_of(rq)), + soft_quota_throttled_list) { + if (max_cnt++ > 20) + break; + + if (cfs_rq->throttled) { + cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + raw_spin_lock(&cfs_b->lock); + + if (!check_soft_runtime(cfs_rq->tg, slice)) { + raw_spin_unlock(&cfs_b->lock); + continue; + } + + raw_spin_unlock(&cfs_b->lock); + + if (cfs_rq->runtime_remaining + slice > 0) { + cfs_rq->runtime_remaining += slice; + cfs_rq->sum_soft_runtime += slice; + cfs_rq->soft_quota_enable = 1; + unthrottle_cfs_rq(cfs_rq); + ret = true; + break; + } + } + } + + return ret; +} +#endif diff --git a/kernel/sched/features.h b/kernel/sched/features.h index ea7ba74810e38b9b0f33c033a29674a5ff0f36fa..d587b7a12629b52ebb39bc4cb0ac5c3d02ea4031 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -110,3 +110,7 @@ SCHED_FEAT(DA_UTIL_TASKGROUP, true) #ifdef CONFIG_SCHED_SOFT_DOMAIN SCHED_FEAT(SOFT_DOMAIN, false) #endif + +#ifdef CONFIG_SCHED_SOFT_QUOTA +SCHED_FEAT(SOFT_QUOTA, false) +#endif diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 5007b25c5bc653a9ca0696af1f7136150cd6131e..3518a1a28e8c5964cc5a6e7ae14f7ed65f64ba20 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -427,6 +427,13 @@ struct task_struct *pick_next_task_idle(struct rq *rq) { struct task_struct *next = rq->idle; +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (sched_feat(SOFT_QUOTA)) { + if (unthrottle_cfs_rq_soft_quota(rq) && rq->cfs.nr_running) + return pick_next_task_fair(rq, NULL, NULL); + } +#endif + set_next_task_idle(rq, next, true); return next; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f6a3f93d1f7552b61f782ee5edd5ab249d7cd3fa..0e21ad151ec952c84c393a6bd98e48b1ffc18878 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -484,7 +484,11 @@ struct task_group { #else KABI_RESERVE(1) #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + KABI_USE(2, u64 soft_quota) +#else KABI_RESERVE(2) +#endif KABI_RESERVE(3) KABI_RESERVE(4) KABI_RESERVE(5) @@ -578,6 +582,10 @@ static inline void tg_update_affinity_domains(int cpu, int online) {} static inline void offline_auto_affinity(struct task_group *tg) { } #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA +extern bool unthrottle_cfs_rq_soft_quota(struct rq *rq); +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); @@ -769,10 +777,17 @@ struct cfs_rq { unsigned long qos_idle_h_nr_running_padding; }; #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + KABI_USE(1, u64 soft_quota_enable) + KABI_USE(2, u64 sum_soft_runtime) + KABI_REPLACE(_KABI_RESERVE(3); _KABI_RESERVE(4), + struct list_head soft_quota_throttled_list) +#else KABI_RESERVE(1) KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) +#endif KABI_RESERVE(5) KABI_RESERVE(6) KABI_RESERVE(7)