diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fa8c8e5853f110c6a9a9660655c22b9a1f60577b..349c9f5fc55ddcac9df8edc793cd15de06559feb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10800,6 +10800,14 @@ static int cpu_cgroup_css_online(struct cgroup_subsys_state *css) return 0; } +static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) +{ + struct task_group *tg = css_tg(css); + + offline_auto_affinity(tg); + offline_soft_domain(tg); +} + static void cpu_cgroup_css_released(struct cgroup_subsys_state *css) { struct task_group *tg = css_tg(css); @@ -11761,15 +11769,10 @@ static int cpu_soft_domain_quota_write_u64(struct cgroup_subsys_state *css, { struct task_group *tg = css_tg(css); - if (tg->sf_ctx->policy != 0) - return -EINVAL; - if (val > cpumask_weight(cpumask_of_node(0))) return -EINVAL; - tg->sf_ctx->nr_cpus = (int)val; - - return 0; + return sched_group_set_soft_domain_quota(tg, val); } static u64 cpu_soft_domain_quota_read_u64(struct cgroup_subsys_state *css, @@ -12237,6 +12240,7 @@ static struct cftype cpu_files[] = { struct cgroup_subsys cpu_cgrp_subsys = { .css_alloc = cpu_cgroup_css_alloc, .css_online = cpu_cgroup_css_online, + .css_offline = cpu_cgroup_css_offline, .css_released = cpu_cgroup_css_released, .css_free = cpu_cgroup_css_free, .css_extra_stat_show = cpu_extra_stat_show, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 21bd2ca4172d9a07babbefbe88fc8a7f98aa1eac..c4c3afa6e7b4f51f39445da63a161e876366a285 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7282,7 +7282,7 @@ int init_auto_affinity(struct task_group *tg) return 0; } -static void destroy_auto_affinity(struct task_group *tg) +void offline_auto_affinity(struct task_group *tg) { struct auto_affinity *auto_affi = tg->auto_affinity; @@ -7294,11 +7294,21 @@ static void destroy_auto_affinity(struct task_group *tg) if (auto_affi->period_active) smart_grid_usage_dec(); +} + +static void destroy_auto_affinity(struct task_group *tg) +{ + struct auto_affinity *auto_affi = tg->auto_affinity; + + if (!smart_grid_enabled()) + return; + + if (unlikely(!auto_affi)) + return; hrtimer_cancel(&auto_affi->period_timer); sched_grid_zone_del_af(auto_affi); free_affinity_domains(&auto_affi->ad); - kfree(tg->auto_affinity); tg->auto_affinity = NULL; } @@ -9176,22 +9186,25 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, #ifdef CONFIG_SCHED_SOFT_DOMAIN static int wake_soft_domain(struct task_struct *p, int target) { - struct cpumask *mask = NULL; + struct cpumask *mask = this_cpu_cpumask_var_ptr(select_rq_mask); struct soft_domain_ctx *ctx = NULL; - rcu_read_lock(); ctx = task_group(p)->sf_ctx; if (!ctx || ctx->policy == 0) - goto unlock; + goto out; - mask = to_cpumask(ctx->span); - if (cpumask_test_cpu(target, mask)) - goto unlock; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_and(mask, to_cpumask(ctx->span), p->select_cpus); +#else + cpumask_and(mask, to_cpumask(ctx->span), p->cpus_ptr); +#endif + cpumask_and(mask, mask, cpu_active_mask); + if (cpumask_empty(mask) || cpumask_test_cpu(target, mask)) + goto out; else target = cpumask_any_distribute(mask); -unlock: - rcu_read_unlock(); +out: return target; } @@ -9251,11 +9264,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) new_cpu = prev_cpu; } -#ifdef CONFIG_SCHED_SOFT_DOMAIN - if (sched_feat(SOFT_DOMAIN)) - new_cpu = prev_cpu = wake_soft_domain(p, prev_cpu); -#endif - #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->select_cpus); #else @@ -9264,6 +9272,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) } rcu_read_lock(); + +#ifdef CONFIG_SCHED_SOFT_DOMAIN + if (sched_feat(SOFT_DOMAIN)) + new_cpu = prev_cpu = wake_soft_domain(p, prev_cpu); +#endif #ifdef CONFIG_BPF_SCHED if (bpf_sched_enabled()) { ctx.task = p; @@ -14846,22 +14859,6 @@ void free_fair_sched_group(struct task_group *tg) kfree(tg->se); } -#ifdef CONFIG_SCHED_SOFT_DOMAIN -int init_soft_domain(struct task_group *tg) -{ - struct soft_domain_ctx *sf_ctx = NULL; - - sf_ctx = kzalloc(sizeof(*sf_ctx) + cpumask_size(), GFP_KERNEL); - if (!sf_ctx) - return -ENOMEM; - - sf_ctx->policy = 0; - tg->sf_ctx = sf_ctx; - - return 0; -} -#endif - int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) { struct sched_entity *se; @@ -14882,7 +14879,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) if (ret) goto err; - ret = init_soft_domain(tg); + ret = init_soft_domain(tg, parent); if (ret) goto err; @@ -14908,6 +14905,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) kfree(cfs_rq); err: destroy_auto_affinity(tg); + destroy_soft_domain(tg); return 0; } @@ -14937,6 +14935,7 @@ void unregister_fair_sched_group(struct task_group *tg) destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); destroy_auto_affinity(tg); + destroy_soft_domain(tg); for_each_possible_cpu(cpu) { if (tg->se[cpu]) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6b8aa78272e319755d8ca886b8498abbca63394e..b418b334b804bd59aa085726664745cd1f6566db 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -573,6 +573,7 @@ static inline int sched_task_is_throttled(struct task_struct *p, int cpu) extern void start_auto_affinity(struct auto_affinity *auto_affi); extern void stop_auto_affinity(struct auto_affinity *auto_affi); extern int init_auto_affinity(struct task_group *tg); +void offline_auto_affinity(struct task_group *tg); extern void tg_update_affinity_domains(int cpu, int online); extern int tg_rebuild_affinity_domains(int cpu, struct auto_affinity *auto_affi); @@ -583,6 +584,7 @@ static inline int init_auto_affinity(struct task_group *tg) } static inline void tg_update_affinity_domains(int cpu, int online) {} +static inline void offline_auto_affinity(struct task_group *tg) { } #endif #ifdef CONFIG_FAIR_GROUP_SCHED @@ -3759,9 +3761,11 @@ bool bpf_sched_is_cpu_allowed(struct task_struct *p, int cpu); #ifdef CONFIG_SCHED_SOFT_DOMAIN void build_soft_domain(void); -int init_soft_domain(struct task_group *tg); - +int init_soft_domain(struct task_group *tg, struct task_group *parent); +int destroy_soft_domain(struct task_group *tg); +void offline_soft_domain(struct task_group *tg); int sched_group_set_soft_domain(struct task_group *tg, long val); +int sched_group_set_soft_domain_quota(struct task_group *tg, long val); static inline struct cpumask *soft_domain_span(unsigned long span[]) { @@ -3770,7 +3774,14 @@ static inline struct cpumask *soft_domain_span(unsigned long span[]) #else static inline void build_soft_domain(void) { } -static inline int init_soft_domain(struct task_group *tg) +static inline int init_soft_domain(struct task_group *tg, struct task_group *parent) +{ + return 0; +} + +void offline_soft_domain(struct task_group *tg) { } + +static inline int destroy_soft_domain(struct task_group *tg) { return 0; } diff --git a/kernel/sched/soft_domain.c b/kernel/sched/soft_domain.c index 5c56428833d1580a207b58339cb8bc6c3e102115..c34be1fee3e03fcac21328dc511e8234f4f8b241 100644 --- a/kernel/sched/soft_domain.c +++ b/kernel/sched/soft_domain.c @@ -17,6 +17,30 @@ #include +static DEFINE_STATIC_KEY_TRUE(__soft_domain_switch); + +static int __init soft_domain_switch_setup(char *str) +{ + int val = 0; + + if (kstrtoint(str, 0, &val)) + pr_warn("sched_soft_domain parameter is error: %s\n", str); + else { + if (val == 1) + static_branch_enable(&__soft_domain_switch); + else if (val == 0) + static_branch_disable(&__soft_domain_switch); + } + + return 1; +} +__setup("sched_soft_domain=", soft_domain_switch_setup); + +static bool soft_domain_enabled(void) +{ + return static_branch_likely(&__soft_domain_switch); +} + static DEFINE_PER_CPU(struct soft_domain *, g_sf_d); static void free_sub_soft_domain(struct soft_domain *sf_d); @@ -37,7 +61,7 @@ static int build_soft_sub_domain(struct sched_domain *sd, struct cpumask *cpus) sf_d->nr_available_cpus = cpumask_weight(span); cpumask_copy(to_cpumask(sf_d->span), span); - for_each_cpu_and(i, sched_domain_span(sd), cpus) { + for_each_cpu_and(i, span, cpus) { struct soft_subdomain *sub_d = NULL; sub_d = kzalloc_node(sizeof(struct soft_subdomain) + cpumask_size(), @@ -46,13 +70,12 @@ static int build_soft_sub_domain(struct sched_domain *sd, struct cpumask *cpus) free_sub_soft_domain(sf_d); return -ENOMEM; } - list_add_tail(&sub_d->node, &sf_d->child_domain); - cpumask_copy(soft_domain_span(sub_d->span), cpu_clustergroup_mask(i)); + cpumask_and(soft_domain_span(sub_d->span), span, cpu_clustergroup_mask(i)); cpumask_andnot(cpus, cpus, cpu_clustergroup_mask(i)); } - for_each_cpu(i, sched_domain_span(sd)) { + for_each_cpu(i, span) { rcu_assign_pointer(per_cpu(g_sf_d, i), sf_d); } @@ -87,6 +110,8 @@ static void free_soft_domain(void) if (sf_d) free_sub_soft_domain(sf_d); } + + static_branch_disable(&__soft_domain_switch); } void build_soft_domain(void) @@ -95,6 +120,9 @@ void build_soft_domain(void) static struct cpumask cpus; int i, ret; + if (!soft_domain_enabled()) + return; + cpumask_copy(&cpus, cpu_active_mask); rcu_read_lock(); for_each_cpu(i, &cpus) { @@ -137,6 +165,7 @@ static int subdomain_cmp(const void *a, const void *b) struct soft_domain_args { int policy; + int nr_cpu; struct cpumask *cpus; }; @@ -145,9 +174,10 @@ static int tg_set_soft_domain(struct task_group *tg, void *data) struct soft_domain_args *args = (struct soft_domain_args *)data; tg->sf_ctx->policy = args->policy; - if (args->policy) + if (args->policy) { cpumask_copy(to_cpumask(tg->sf_ctx->span), args->cpus); - else + tg->sf_ctx->nr_cpus = args->nr_cpu; + } else cpumask_clear(to_cpumask(tg->sf_ctx->span)); return 0; @@ -164,8 +194,6 @@ static int __calc_cpu(struct task_group *tg) nr_cpu = DIV_ROUND_UP_ULL(tg->cfs_bandwidth.quota, tg->cfs_bandwidth.period); #endif - tg->sf_ctx->nr_cpus = nr_cpu; - return nr_cpu; } @@ -202,23 +230,36 @@ static struct soft_domain *find_idlest_llc(long policy, int cpu; int max_cpu = 0; struct soft_domain *idlest = NULL; + unsigned long min_util = ULONG_MAX; /* The user has specified the llc. */ if (policy > 0) { - cpu = cpumask_first(cpumask_of_node(policy-1)); - idlest = rcu_dereference(per_cpu(g_sf_d, cpu)); - return idlest; + for_each_cpu(cpu, cpumask_of_node(policy-1)) { + idlest = rcu_dereference(per_cpu(g_sf_d, cpu)); + if (idlest != NULL) + break; + } + + if (idlest && nr_cpu <= cpumask_weight(to_cpumask(idlest->span))) + return idlest; + + return NULL; } cpumask_copy(cpus, cpu_active_mask); for_each_cpu(cpu, cpus) { struct soft_domain *sf_d = NULL; - unsigned long min_util = ULONG_MAX; + struct cpumask *mask; sf_d = rcu_dereference(per_cpu(g_sf_d, cpu)); if (sf_d == NULL) continue; + mask = to_cpumask(sf_d->span); + cpumask_andnot(cpus, cpus, mask); + if (nr_cpu > cpumask_weight(mask)) + continue; + /* * LLC selection order: * 1. When the number of idle cpus meet the requirements, @@ -231,15 +272,13 @@ static struct soft_domain *find_idlest_llc(long policy, max_cpu = sf_d->nr_available_cpus; idlest = sf_d; } else if (max_cpu == 0) { /* No llc meets the demand */ - unsigned long util = sum_util(to_cpumask(sf_d->span)); + unsigned long util = sum_util(mask); if (idlest == NULL || util < min_util) { idlest = sf_d; min_util = util; } } - - cpumask_andnot(cpus, cpus, to_cpumask(sf_d->span)); } return idlest; @@ -250,9 +289,9 @@ static int __sched_group_set_soft_domain(struct task_group *tg, long policy) int cpu; int ret = 0; cpumask_var_t cpus; - int nr_cpu = __calc_cpu(tg); struct soft_domain_args args; struct domain_node nodes[NR_MAX_CLUSTER] = {0}; + int nr_cpu = __calc_cpu(tg); if (check_policy(tg, policy)) return -EINVAL; @@ -286,7 +325,7 @@ static int __sched_group_set_soft_domain(struct task_group *tg, long policy) cpumask_clear(cpus); sort(nodes, nr, sizeof(struct domain_node), subdomain_cmp, NULL); - sf_d->nr_available_cpus -= min(sf_d->nr_available_cpus, tmp_cpu); + sf_d->nr_available_cpus -= tmp_cpu; for (i = 0; i < nr; i++) { sub_d = nodes[i].sud_d; tmpmask = to_cpumask(sub_d->span); @@ -300,12 +339,14 @@ static int __sched_group_set_soft_domain(struct task_group *tg, long policy) /* 3. attach task group to softdomain. */ args.policy = policy; args.cpus = cpus; + args.nr_cpu = tmp_cpu; walk_tg_tree_from(tg, tg_set_soft_domain, tg_nop, &args); /* * 4. TODO * add tg to llc domain task_groups list for load balance. */ + tg->sf_ctx->nr_cpus = tmp_cpu; tg->sf_ctx->sf_d = sf_d; } else { ret = -EINVAL; @@ -328,7 +369,7 @@ static int __sched_group_unset_soft_domain(struct task_group *tg) struct list_head *children = NULL; /* If parent has set soft domain, child group can't unset itself. */ - if (tg->parent->sf_ctx->policy != 0) + if (tg->parent->sf_ctx != NULL && tg->parent->sf_ctx->policy != 0) return -EINVAL; sf_d = tg->sf_ctx->sf_d; @@ -349,6 +390,9 @@ int sched_group_set_soft_domain(struct task_group *tg, long val) { int ret = 0; + if (!soft_domain_enabled()) + return -EPERM; + if (val < -1 || val > nr_node_ids) return -EINVAL; @@ -371,3 +415,85 @@ int sched_group_set_soft_domain(struct task_group *tg, long val) return ret; } + +int sched_group_set_soft_domain_quota(struct task_group *tg, long val) +{ + int ret = 0; + + if (!soft_domain_enabled()) + return -EPERM; + + mutex_lock(&soft_domain_mutex); + if (tg->sf_ctx->policy != 0) { + ret = -EINVAL; + goto out; + } else + tg->sf_ctx->nr_cpus = (int)val; + +out: + mutex_unlock(&soft_domain_mutex); + + return ret; +} + +int init_soft_domain(struct task_group *tg, struct task_group *parent) +{ + struct soft_domain_ctx *sf_ctx = NULL; + struct soft_domain_ctx *psf_ctx = NULL; + + if (!soft_domain_enabled()) + return 0; + + sf_ctx = kzalloc(sizeof(*sf_ctx) + cpumask_size(), GFP_KERNEL); + if (!sf_ctx) + return -ENOMEM; + + mutex_lock(&soft_domain_mutex); + psf_ctx = parent->sf_ctx; + if (psf_ctx) { + sf_ctx->policy = psf_ctx->policy; + sf_ctx->nr_cpus = psf_ctx->nr_cpus; + cpumask_copy(to_cpumask(sf_ctx->span), to_cpumask(psf_ctx->span)); + } + + tg->sf_ctx = sf_ctx; + mutex_unlock(&soft_domain_mutex); + + return 0; +} + +void offline_soft_domain(struct task_group *tg) +{ + struct soft_domain_ctx *sf_ctx = NULL; + struct soft_domain_ctx *psf_ctx = NULL; + + if (!soft_domain_enabled()) + return; + + sf_ctx = tg->sf_ctx; + psf_ctx = tg->parent->sf_ctx; + + if (!sf_ctx) + return; + + mutex_lock(&soft_domain_mutex); + if (sf_ctx->policy != 0) { + /* + * parent group is not set, this group set + * soft domain by user. + */ + if (psf_ctx == NULL || psf_ctx->policy == 0) + __sched_group_unset_soft_domain(tg); + } + mutex_unlock(&soft_domain_mutex); +} + +int destroy_soft_domain(struct task_group *tg) +{ + if (!soft_domain_enabled()) + return 0; + + kfree(tg->sf_ctx); + + return 0; +}