From 18f30ae68a853a0c093cd391bf7ad53d49c069b9 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Sun, 13 Feb 2022 22:32:45 +0800 Subject: [PATCH 01/11] sched: Introduce related thread group scheduling codeaurora inclusion category: feature issue: #I4SULH CVE: NA Signed-off-by: Li Ming ------------------------------------------- The original patch is from Code Aurora's latest msm-4.14. Based on the original patch, we add definitions for related thread group, and a subsequent changeset will provide improved schedule for related thread group. Signed-off-by: Vikram Mulukutla Signed-off-by: Satya Durga Srinivasu Prabhala Signed-off-by: Srinath Sridharan --- include/linux/sched.h | 7 +++++++ include/linux/sched/rtg.h | 15 +++++++++++++++ init/Kconfig | 2 ++ kernel/sched/Makefile | 1 + kernel/sched/core.c | 11 +++++++++++ kernel/sched/rtg/Kconfig | 10 ++++++++++ kernel/sched/rtg/Makefile | 2 ++ kernel/sched/rtg/rtg.c | 13 +++++++++++++ kernel/sched/rtg/rtg.h | 14 ++++++++++++++ kernel/sched/walt.c | 5 +++++ 10 files changed, 80 insertions(+) create mode 100644 include/linux/sched/rtg.h create mode 100644 kernel/sched/rtg/Kconfig create mode 100644 kernel/sched/rtg/Makefile create mode 100644 kernel/sched/rtg/rtg.c create mode 100644 kernel/sched/rtg/rtg.h diff --git a/include/linux/sched.h b/include/linux/sched.h index 44d5d8ed532a..c5e0c99cb3cd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include #include #include +#include /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -801,6 +802,12 @@ struct task_struct { u64 last_sleep_ts; #endif +#ifdef CONFIG_SCHED_RTG + int rtg_depth; + struct related_thread_group *grp; + struct list_head grp_list; +#endif + #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h new file mode 100644 index 000000000000..c17636439964 --- /dev/null +++ b/include/linux/sched/rtg.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SCHED_RTG_H +#define __SCHED_RTG_H + +#ifdef CONFIG_SCHED_RTG +struct related_thread_group { + int id; + raw_spinlock_t lock; + struct list_head tasks; + struct list_head list; + + unsigned int nr_running; +}; +#endif /* CONFIG_SCHED_RTG */ +#endif diff --git a/init/Kconfig b/init/Kconfig index 1512479e7782..1d248e9c5a89 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -858,6 +858,8 @@ config UCLAMP_BUCKETS_COUNT If in doubt, use the default value. +source "kernel/sched/rtg/Kconfig" + endmenu # diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 0e3173ee99fb..1b4834073ae7 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -28,6 +28,7 @@ obj-y += wait.o wait_bit.o swait.o completion.o obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o obj-$(CONFIG_SCHED_WALT) += walt.o +obj-$(CONFIG_SCHED_RTG) += rtg/ obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o obj-$(CONFIG_SCHEDSTATS) += stats.o obj-$(CONFIG_SCHED_DEBUG) += debug.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 33e19cbd4eee..3a86b124f41c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -29,6 +29,7 @@ #include "pelt.h" #include "smp.h" #include "walt.h" +#include "rtg/rtg.h" /* * Export tracepoints that act as a bare tracehook (ie: have no trace event @@ -3207,6 +3208,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) #ifdef CONFIG_SMP p->wake_entry.u_flags = CSD_TYPE_TTWU; #endif +#ifdef CONFIG_SCHED_RTG + p->rtg_depth = 0; +#endif } DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); @@ -3350,7 +3354,14 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) if (unlikely(p->sched_reset_on_fork)) { if (task_has_dl_policy(p) || task_has_rt_policy(p)) { p->policy = SCHED_NORMAL; +#ifdef CONFIG_SCHED_RTG + if (current->rtg_depth != 0) + p->static_prio = current->static_prio; + else + p->static_prio = NICE_TO_PRIO(0); +#else p->static_prio = NICE_TO_PRIO(0); +#endif p->rt_priority = 0; } else if (PRIO_TO_NICE(p->static_prio) < 0) p->static_prio = NICE_TO_PRIO(0); diff --git a/kernel/sched/rtg/Kconfig b/kernel/sched/rtg/Kconfig new file mode 100644 index 000000000000..11a0343d935b --- /dev/null +++ b/kernel/sched/rtg/Kconfig @@ -0,0 +1,10 @@ +menu "Related Thread Group" + +config SCHED_RTG + bool "Related Thread Group" + depends on SCHED_WALT + default n + help + Set related threads into a group. + +endmenu diff --git a/kernel/sched/rtg/Makefile b/kernel/sched/rtg/Makefile new file mode 100644 index 000000000000..a911575b0734 --- /dev/null +++ b/kernel/sched/rtg/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_SCHED_RTG) += rtg.o diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c new file mode 100644 index 000000000000..f48905afbbf4 --- /dev/null +++ b/kernel/sched/rtg/rtg.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * related thread group sched + * + */ +#include +#include "rtg.h" + +void init_task_rtg(struct task_struct *p) +{ + rcu_assign_pointer(p->grp, NULL); + INIT_LIST_HEAD(&p->grp_list); +} diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h new file mode 100644 index 000000000000..39046758a6b7 --- /dev/null +++ b/kernel/sched/rtg/rtg.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * related thread group sched header + */ +#ifndef __RTG_H +#define __RTG_H + +#include +#include + +#ifdef CONFIG_SCHED_RTG +void init_task_rtg(struct task_struct *p); +#endif /* CONFIG_SCHED_RTG */ +#endif diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 30db3d617914..38699a333540 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -24,6 +24,7 @@ #include "sched.h" #include "walt.h" #include "core_ctl.h" +#include "rtg/rtg.h" #define CREATE_TRACE_POINTS #include #undef CREATE_TRACE_POINTS @@ -1160,6 +1161,10 @@ void init_new_task_load(struct task_struct *p) u32 init_load_windows_scaled = sched_init_task_load_windows_scaled; u32 init_load_pct = current->init_load_pct; +#ifdef CONFIG_SCHED_RTG + init_task_rtg(p); +#endif + p->last_sleep_ts = 0; p->init_load_pct = 0; memset(&p->ravg, 0, sizeof(struct ravg)); -- Gitee From cf327988be3e038a3630c66f0965b4bb61fc78bd Mon Sep 17 00:00:00 2001 From: Li Ming Date: Sun, 13 Feb 2022 23:36:45 +0800 Subject: [PATCH 02/11] sched: Minimally initialize the related thread group codeaurora inclusion category: feature issue: #I4SULH CVE: NA Signed-off-by: Li Ming ------------------------------------------- 21 groups (MAX_NUM_CGROUP_COLOC_ID) are created by default, of which DEFAULT_CGROUP_COLOC_ID is a reserved id. Signed-off-by: Vikram Mulukutla Signed-off-by: Satya Durga Srinivasu Prabhala Signed-off-by: Srinath Sridharan --- include/linux/sched/rtg.h | 7 ++ kernel/sched/core.c | 5 + kernel/sched/rtg/rtg.c | 204 ++++++++++++++++++++++++++++++++++++++ kernel/sched/rtg/rtg.h | 6 ++ kernel/sched/walt.c | 3 + 5 files changed, 225 insertions(+) diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h index c17636439964..85bd334fa9cc 100644 --- a/include/linux/sched/rtg.h +++ b/include/linux/sched/rtg.h @@ -3,6 +3,11 @@ #define __SCHED_RTG_H #ifdef CONFIG_SCHED_RTG + +#define DEFAULT_RTG_GRP_ID 0 +#define DEFAULT_CGROUP_COLOC_ID 1 +#define MAX_NUM_CGROUP_COLOC_ID 21 + struct related_thread_group { int id; raw_spinlock_t lock; @@ -11,5 +16,7 @@ struct related_thread_group { unsigned int nr_running; }; + +int sched_set_group_id(struct task_struct *p, unsigned int group_id); #endif /* CONFIG_SCHED_RTG */ #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3a86b124f41c..574c155b9e3a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7656,6 +7656,7 @@ void __init sched_init(void) atomic_set(&rq->nr_iowait, 0); } + BUG_ON(alloc_related_thread_groups()); set_load_weight(&init_task, false); /* @@ -8970,6 +8971,10 @@ void sched_exit(struct task_struct *p) struct rq *rq; u64 wallclock; +#ifdef CONFIG_SCHED_RTG + sched_set_group_id(p, 0); +#endif + rq = task_rq_lock(p, &rf); /* rq->curr == p */ diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c index f48905afbbf4..6d54c48ba36f 100644 --- a/kernel/sched/rtg/rtg.c +++ b/kernel/sched/rtg/rtg.c @@ -4,10 +4,214 @@ * */ #include + +#include "../sched.h" #include "rtg.h" +struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID]; +static DEFINE_RWLOCK(related_thread_group_lock); +static LIST_HEAD(active_related_thread_groups); + void init_task_rtg(struct task_struct *p) { rcu_assign_pointer(p->grp, NULL); INIT_LIST_HEAD(&p->grp_list); } + +struct related_thread_group *task_related_thread_group(struct task_struct *p) +{ + return rcu_dereference(p->grp); +} + +struct related_thread_group * +lookup_related_thread_group(unsigned int group_id) +{ + return related_thread_groups[group_id]; +} + +int alloc_related_thread_groups(void) +{ + int i, ret; + struct related_thread_group *grp = NULL; + + /* groupd_id = 0 is invalid as it's special id to remove group. */ + for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) { + grp = kzalloc(sizeof(*grp), GFP_NOWAIT); + if (!grp) { + ret = -ENOMEM; + goto err; + } + + grp->id = i; + INIT_LIST_HEAD(&grp->tasks); + INIT_LIST_HEAD(&grp->list); + raw_spin_lock_init(&grp->lock); + + related_thread_groups[i] = grp; + } + + return 0; + +err: + for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) { + grp = lookup_related_thread_group(i); + if (grp) { + kfree(grp); + related_thread_groups[i] = NULL; + } else { + break; + } + } + + return ret; +} + +static void remove_task_from_group(struct task_struct *p) +{ + struct related_thread_group *grp = p->grp; + struct rq *rq = NULL; + bool empty_group = true; + struct rq_flags flag; + unsigned long irqflag; + + rq = __task_rq_lock(p, &flag); + + raw_spin_lock_irqsave(&grp->lock, irqflag); + list_del_init(&p->grp_list); + rcu_assign_pointer(p->grp, NULL); + + if (p->on_cpu) + grp->nr_running--; + + if ((int)grp->nr_running < 0) { + WARN_ON(1); + grp->nr_running = 0; + } + + if (!list_empty(&grp->tasks)) + empty_group = false; + + raw_spin_unlock_irqrestore(&grp->lock, irqflag); + __task_rq_unlock(rq, &flag); + + /* Reserved groups cannot be destroyed */ + if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID) { + /* + * We test whether grp->list is attached with list_empty() + * hence re-init the list after deletion. + */ + write_lock(&related_thread_group_lock); + list_del_init(&grp->list); + write_unlock(&related_thread_group_lock); + } +} + +static int +add_task_to_group(struct task_struct *p, struct related_thread_group *grp) +{ + struct rq *rq = NULL; + struct rq_flags flag; + unsigned long irqflag; + + /* + * Change p->grp under rq->lock. Will prevent races with read-side + * reference of p->grp in various hot-paths + */ + rq = __task_rq_lock(p, &flag); + + raw_spin_lock_irqsave(&grp->lock, irqflag); + list_add(&p->grp_list, &grp->tasks); + rcu_assign_pointer(p->grp, grp); + if (p->on_cpu) + grp->nr_running++; + + raw_spin_unlock_irqrestore(&grp->lock, irqflag); + __task_rq_unlock(rq, &flag); + + return 0; +} + +static int __sched_set_group_id(struct task_struct *p, unsigned int group_id) +{ + int rc = 0; + unsigned long flags; + struct related_thread_group *grp = NULL; + struct related_thread_group *old_grp = NULL; + + if (group_id >= MAX_NUM_CGROUP_COLOC_ID) + return -EINVAL; + + raw_spin_lock_irqsave(&p->pi_lock, flags); + old_grp = p->grp; + if ((current != p && (p->flags & PF_EXITING)) || + (!old_grp && !group_id)) + goto done; + + /* + * If the system has CONFIG_SCHED_RTG_CGROUP, only tasks in DEFAULT group + * can be directly switched to other groups. + * + * In other cases, Switching from one group to another directly is not permitted. + */ + if (old_grp && group_id) { + pr_err("%s[%d] switching group from %d to %d failed.\n", + p->comm, p->pid, old_grp->id, group_id); + rc = -EINVAL; + goto done; + } + + if (!group_id) { + remove_task_from_group(p); + goto done; + } + + grp = lookup_related_thread_group(group_id); + write_lock(&related_thread_group_lock); + if (list_empty(&grp->list)) + list_add(&grp->list, &active_related_thread_groups); + write_unlock(&related_thread_group_lock); + + rc = add_task_to_group(p, grp); +done: + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + + return rc; +} + +/* group_id == 0: remove task from rtg */ +int sched_set_group_id(struct task_struct *p, unsigned int group_id) +{ + /* DEFAULT_CGROUP_COLOC_ID is a reserved id */ + if (group_id == DEFAULT_CGROUP_COLOC_ID) + return -EINVAL; + + return __sched_set_group_id(p, group_id); +} + +void update_group_nr_running(struct task_struct *p, int event) +{ + struct related_thread_group *grp; + + rcu_read_lock(); + grp = task_related_thread_group(p); + if (!grp) { + rcu_read_unlock(); + return; + } + + raw_spin_lock(&grp->lock); + + if (event == PICK_NEXT_TASK) + grp->nr_running++; + else if (event == PUT_PREV_TASK) + grp->nr_running--; + + if ((int)grp->nr_running < 0) { + WARN_ON(1); + grp->nr_running = 0; + } + + raw_spin_unlock(&grp->lock); + + rcu_read_unlock(); +} diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h index 39046758a6b7..80661f8b2d32 100644 --- a/kernel/sched/rtg/rtg.h +++ b/kernel/sched/rtg/rtg.h @@ -10,5 +10,11 @@ #ifdef CONFIG_SCHED_RTG void init_task_rtg(struct task_struct *p); +int alloc_related_thread_groups(void); +struct related_thread_group *lookup_related_thread_group(unsigned int group_id); +struct related_thread_group *task_related_thread_group(struct task_struct *p); +void update_group_nr_running(struct task_struct *p, int event); +#else +static inline int alloc_related_thread_groups(void) { return 0; } #endif /* CONFIG_SCHED_RTG */ #endif diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 38699a333540..f560321b8691 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -1109,6 +1109,9 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int event, old_window_start = update_window_start(rq, wallclock, event); +#ifdef CONFIG_SCHED_RTG + update_group_nr_running(p, event); +#endif if (!p->ravg.mark_start) goto done; -- Gitee From f1ca14c3ed9637a0f123ef938624d8029d265d84 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 14 Feb 2022 09:46:44 +0800 Subject: [PATCH 03/11] sched: Add debugfs for related thread group codeaurora inclusion category: feature issue: #I4SULH CVE: NA Signed-off-by: Li Ming ------------------------------------------- /proc/$PID/sched_group_id: 1. write $GROUP_ID to the sched_group_id file to add task (pid = $PID) to related thread group (group_id = $GROUP_ID). 2. read the group id which the process is located from the sched_group_id file Signed-off-by: Vikram Mulukutla Signed-off-by: Satya Durga Srinivasu Prabhala Signed-off-by: Srinath Sridharan --- fs/proc/base.c | 70 ++++++++++++++++++ include/linux/sched/rtg.h | 1 + kernel/sched/rtg/Kconfig | 7 ++ kernel/sched/rtg/rtg.c | 151 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 229 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index 96d4ab81619e..0d40f7a2cc4d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1499,6 +1499,70 @@ static const struct file_operations proc_pid_sched_operations = { #endif +#ifdef CONFIG_SCHED_RTG_DEBUG +static int sched_group_id_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + seq_printf(m, "%d\n", sched_get_group_id(p)); + + put_task_struct(p); + + return 0; +} + +static ssize_t +sched_group_id_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[PROC_NUMBUF]; + int group_id, err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &group_id); + if (err) + goto out; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = sched_set_group_id(p, group_id); + + put_task_struct(p); + +out: + return err < 0 ? err : count; +} + +static int sched_group_id_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_group_id_show, inode); +} + +static const struct file_operations proc_pid_sched_group_id_operations = { + .open = sched_group_id_open, + .read = seq_read, + .write = sched_group_id_write, + .llseek = seq_lseek, + .release = single_release, +}; +#endif /* CONFIG_SCHED_RTG_DEBUG */ + #ifdef CONFIG_SCHED_AUTOGROUP /* * Print out autogroup related information: @@ -3372,6 +3436,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_ACCESS_TOKENID ONE("tokenid", S_IRUSR, proc_token_operations), #endif +#ifdef CONFIG_SCHED_RTG_DEBUG + REG("sched_group_id", S_IRUGO|S_IWUGO, proc_pid_sched_group_id_operations), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3704,6 +3771,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_ACCESS_TOKENID ONE("tokenid", S_IRUSR, proc_token_operations), #endif +#ifdef CONFIG_SCHED_RTG_DEBUG + REG("sched_group_id", S_IRUGO|S_IWUGO, proc_pid_sched_group_id_operations), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h index 85bd334fa9cc..5da7ef60d8ee 100644 --- a/include/linux/sched/rtg.h +++ b/include/linux/sched/rtg.h @@ -18,5 +18,6 @@ struct related_thread_group { }; int sched_set_group_id(struct task_struct *p, unsigned int group_id); +unsigned int sched_get_group_id(struct task_struct *p); #endif /* CONFIG_SCHED_RTG */ #endif diff --git a/kernel/sched/rtg/Kconfig b/kernel/sched/rtg/Kconfig index 11a0343d935b..a96073631d16 100644 --- a/kernel/sched/rtg/Kconfig +++ b/kernel/sched/rtg/Kconfig @@ -7,4 +7,11 @@ config SCHED_RTG help Set related threads into a group. +config SCHED_RTG_DEBUG + bool "Related Thread Group DebugFS" + depends on SCHED_RTG + default n + help + If set, debug node will show rtg threads + endmenu diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c index 6d54c48ba36f..a3fb4481bd78 100644 --- a/kernel/sched/rtg/rtg.c +++ b/kernel/sched/rtg/rtg.c @@ -12,6 +12,9 @@ struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID]; static DEFINE_RWLOCK(related_thread_group_lock); static LIST_HEAD(active_related_thread_groups); +#define for_each_related_thread_group(grp) \ + list_for_each_entry(grp, &active_related_thread_groups, list) + void init_task_rtg(struct task_struct *p) { rcu_assign_pointer(p->grp, NULL); @@ -188,6 +191,19 @@ int sched_set_group_id(struct task_struct *p, unsigned int group_id) return __sched_set_group_id(p, group_id); } +unsigned int sched_get_group_id(struct task_struct *p) +{ + unsigned int group_id; + struct related_thread_group *grp = NULL; + + rcu_read_lock(); + grp = task_related_thread_group(p); + group_id = grp ? grp->id : 0; + rcu_read_unlock(); + + return group_id; +} + void update_group_nr_running(struct task_struct *p, int event) { struct related_thread_group *grp; @@ -215,3 +231,138 @@ void update_group_nr_running(struct task_struct *p, int event) rcu_read_unlock(); } + +#ifdef CONFIG_SCHED_RTG_DEBUG +#define seq_printf_rtg(m, x...) \ +do { \ + if (m) \ + seq_printf(m, x); \ + else \ + printk(x); \ +} while (0) + +static void print_rtg_info(struct seq_file *file, + const struct related_thread_group *grp) +{ + seq_printf_rtg(file, "RTG_ID : %d\n", grp->id); +} + +static char rtg_task_state_to_char(const struct task_struct *tsk) +{ + static const char state_char[] = "RSDTtXZPI"; + unsigned int tsk_state = READ_ONCE(tsk->state); + unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; + + BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); + BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1); + + if (tsk_state == TASK_IDLE) + state = TASK_REPORT_IDLE; + return state_char[fls(state)]; +} + +static inline void print_rtg_task_header(struct seq_file *file, + const char *header, int run, int nr) +{ + seq_printf_rtg(file, + "%s : %d/%d\n" + "STATE COMM PID PRIO CPU\n" + "---------------------------------------------------------\n", + header, run, nr); +} + +static inline void print_rtg_task(struct seq_file *file, + const struct task_struct *tsk) +{ + seq_printf_rtg(file, "%5c %15s %5d %5d %5d(%*pbl)\n", + rtg_task_state_to_char(tsk), tsk->comm, tsk->pid, + tsk->prio, task_cpu(tsk), cpumask_pr_args(tsk->cpus_ptr)); +} + +static void print_rtg_threads(struct seq_file *file, + const struct related_thread_group *grp) +{ + struct task_struct *tsk = NULL; + int nr_thread = 0; + + list_for_each_entry(tsk, &grp->tasks, grp_list) + nr_thread++; + + if (!nr_thread) + return; + + print_rtg_task_header(file, "RTG_THREADS", + grp->nr_running, nr_thread); + list_for_each_entry(tsk, &grp->tasks, grp_list) { + if (unlikely(!tsk)) + continue; + get_task_struct(tsk); + print_rtg_task(file, tsk); + put_task_struct(tsk); + } + seq_printf_rtg(file, "---------------------------------------------------------\n"); +} + +static int sched_rtg_debug_show(struct seq_file *file, void *param) +{ + struct related_thread_group *grp = NULL; + unsigned long flags; + bool have_task = false; + + for_each_related_thread_group(grp) { + if (unlikely(!grp)) { + seq_printf_rtg(file, "RTG none\n"); + return 0; + } + + raw_spin_lock_irqsave(&grp->lock, flags); + if (list_empty(&grp->tasks)) { + raw_spin_unlock_irqrestore(&grp->lock, flags); + continue; + } + + if (!have_task) + have_task = true; + + seq_printf_rtg(file, "\n\n"); + print_rtg_info(file, grp); + print_rtg_threads(file, grp); + raw_spin_unlock_irqrestore(&grp->lock, flags); + } + + if (!have_task) + seq_printf_rtg(file, "RTG tasklist empty\n"); + + return 0; +} + +static int sched_rtg_debug_release(struct inode *inode, struct file *file) +{ + seq_release(inode, file); + return 0; +} + +static int sched_rtg_debug_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_rtg_debug_show, NULL); +} + +static const struct proc_ops sched_rtg_debug_fops = { + .proc_open = sched_rtg_debug_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = sched_rtg_debug_release, +}; + +static int __init init_sched_rtg_debug_procfs(void) +{ + struct proc_dir_entry *pe = NULL; + + pe = proc_create("sched_rtg_debug", + 0400, NULL, &sched_rtg_debug_fops); + if (unlikely(!pe)) + return -ENOMEM; + return 0; +} +late_initcall(init_sched_rtg_debug_procfs); +#endif -- Gitee From 4adbc9fbbc760de140d78d84eb75869adee30c40 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 14 Feb 2022 11:25:07 +0800 Subject: [PATCH 04/11] sched: Provide independent load tracking for each group codeaurora inclusion category: feature issue: #I4SULH CVE: NA Signed-off-by: Li Ming ------------------------------------------- 1. record group load in grp->ravg. 2. task's cpu usage is accounted in grp->cpu_time[cpu]->curr/prev_runnable_sum when its ->grp is not NULL, otherwise rq->curr/prev_runnable_sum. Signed-off-by: Vikram Mulukutla Signed-off-by: Satya Durga Srinivasu Prabhala Signed-off-by: Srinath Sridharan --- include/linux/sched.h | 6 + include/linux/sched/rtg.h | 22 +++ include/trace/events/walt.h | 88 ++++++++++++ kernel/sched/rtg/rtg.c | 272 +++++++++++++++++++++++++++++++++++- kernel/sched/rtg/rtg.h | 7 + kernel/sched/sched.h | 3 + kernel/sched/walt.c | 92 ++++++++++-- kernel/sched/walt.h | 11 ++ 8 files changed, 489 insertions(+), 12 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index c5e0c99cb3cd..393cdfdfa6d9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -223,6 +223,12 @@ enum task_event { IRQ_UPDATE = 5, }; +/* Note: this need to be in sync with migrate_type_names array */ +enum migrate_types { + GROUP_TO_RQ, + RQ_TO_GROUP, +}; + #ifdef CONFIG_CPU_ISOLATION_OPT extern int sched_isolate_count(const cpumask_t *mask, bool include_offline); extern int sched_isolate_cpu(int cpu); diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h index 5da7ef60d8ee..a35114766acb 100644 --- a/include/linux/sched/rtg.h +++ b/include/linux/sched/rtg.h @@ -8,6 +8,22 @@ #define DEFAULT_CGROUP_COLOC_ID 1 #define MAX_NUM_CGROUP_COLOC_ID 21 +struct group_cpu_time { + u64 window_start; + u64 curr_runnable_sum; + u64 prev_runnable_sum; + u64 nt_curr_runnable_sum; + u64 nt_prev_runnable_sum; +}; + +struct group_ravg { + unsigned long curr_window_load; + unsigned long curr_window_exec; + unsigned long prev_window_load; + unsigned long prev_window_exec; + unsigned long normalized_util; +}; + struct related_thread_group { int id; raw_spinlock_t lock; @@ -15,6 +31,12 @@ struct related_thread_group { struct list_head list; unsigned int nr_running; + struct group_ravg ravg; + u64 window_start; + u64 mark_start; + u64 prev_window_time; + /* rtg window information for WALT */ + unsigned int window_size; }; int sched_set_group_id(struct task_struct *p, unsigned int group_id); diff --git a/include/trace/events/walt.h b/include/trace/events/walt.h index e5328b75a8bd..9af92c8689b9 100644 --- a/include/trace/events/walt.h +++ b/include/trace/events/walt.h @@ -47,6 +47,43 @@ static inline s64 __rq_update_sum(struct rq *rq, bool curr, bool new) else return rq->prev_runnable_sum; } + +#ifdef CONFIG_SCHED_RTG +static inline s64 __grp_update_sum(struct rq *rq, bool curr, bool new) +{ + if (curr) + if (new) + return rq->grp_time.nt_curr_runnable_sum; + else + return rq->grp_time.curr_runnable_sum; + else + if (new) + return rq->grp_time.nt_prev_runnable_sum; + else + return rq->grp_time.prev_runnable_sum; +} + +static inline s64 +__get_update_sum(struct rq *rq, enum migrate_types migrate_type, + bool src, bool new, bool curr) +{ + switch (migrate_type) { + case RQ_TO_GROUP: + if (src) + return __rq_update_sum(rq, curr, new); + else + return __grp_update_sum(rq, curr, new); + case GROUP_TO_RQ: + if (src) + return __grp_update_sum(rq, curr, new); + else + return __rq_update_sum(rq, curr, new); + default: + WARN_ON_ONCE(1); + return -1; + } +} +#endif #endif TRACE_EVENT(sched_update_history, @@ -162,6 +199,57 @@ TRACE_EVENT(sched_update_task_ravg, __entry->active_windows) ); +extern const char *migrate_type_names[]; + +#ifdef CONFIG_SCHED_RTG +TRACE_EVENT(sched_migration_update_sum, + + TP_PROTO(struct task_struct *p, enum migrate_types migrate_type, struct rq *rq), + + TP_ARGS(p, migrate_type, rq), + + TP_STRUCT__entry( + __field(int, tcpu) + __field(int, pid) + __field(enum migrate_types, migrate_type) + __field(s64, src_cs) + __field(s64, src_ps) + __field(s64, dst_cs) + __field(s64, dst_ps) + __field(s64, src_nt_cs) + __field(s64, src_nt_ps) + __field(s64, dst_nt_cs) + __field(s64, dst_nt_ps) + ), + + TP_fast_assign( + __entry->tcpu = task_cpu(p); + __entry->pid = p->pid; + __entry->migrate_type = migrate_type; + __entry->src_cs = __get_update_sum(rq, migrate_type, + true, false, true); + __entry->src_ps = __get_update_sum(rq, migrate_type, + true, false, false); + __entry->dst_cs = __get_update_sum(rq, migrate_type, + false, false, true); + __entry->dst_ps = __get_update_sum(rq, migrate_type, + false, false, false); + __entry->src_nt_cs = __get_update_sum(rq, migrate_type, + true, true, true); + __entry->src_nt_ps = __get_update_sum(rq, migrate_type, + true, true, false); + __entry->dst_nt_cs = __get_update_sum(rq, migrate_type, + false, true, true); + __entry->dst_nt_ps = __get_update_sum(rq, migrate_type, + false, true, false); + ), + + TP_printk("pid %d task_cpu %d migrate_type %s src_cs %llu src_ps %llu dst_cs %lld dst_ps %lld src_nt_cs %llu src_nt_ps %llu dst_nt_cs %lld dst_nt_ps %lld", + __entry->pid, __entry->tcpu, migrate_type_names[__entry->migrate_type], + __entry->src_cs, __entry->src_ps, __entry->dst_cs, __entry->dst_ps, + __entry->src_nt_cs, __entry->src_nt_ps, __entry->dst_nt_cs, __entry->dst_nt_ps) +); +#endif #endif /* _TRACE_WALT_H */ /* This part must be outside protection */ diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c index a3fb4481bd78..76d8f366fff5 100644 --- a/kernel/sched/rtg/rtg.c +++ b/kernel/sched/rtg/rtg.c @@ -4,9 +4,16 @@ * */ #include +#include #include "../sched.h" #include "rtg.h" +#include "../walt.h" + +#define ADD_TASK 0 +#define REM_TASK 1 + +#define DEFAULT_GROUP_RATE 60 /* 60FPS */ struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID]; static DEFINE_RWLOCK(related_thread_group_lock); @@ -48,6 +55,7 @@ int alloc_related_thread_groups(void) grp->id = i; INIT_LIST_HEAD(&grp->tasks); INIT_LIST_HEAD(&grp->list); + grp->window_size = NSEC_PER_SEC / DEFAULT_GROUP_RATE; raw_spin_lock_init(&grp->lock); related_thread_groups[i] = grp; @@ -69,6 +77,111 @@ int alloc_related_thread_groups(void) return ret; } +/* + * Task's cpu usage is accounted in: + * rq->curr/prev_runnable_sum, when its ->grp is NULL + * grp->cpu_time[cpu]->curr/prev_runnable_sum, when its ->grp is !NULL + * + * Transfer task's cpu usage between those counters when transitioning between + * groups + */ +static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp, + struct task_struct *p, int event) +{ + u64 wallclock; + struct group_cpu_time *cpu_time; + u64 *src_curr_runnable_sum, *dst_curr_runnable_sum; + u64 *src_prev_runnable_sum, *dst_prev_runnable_sum; + u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum; + u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum; + int migrate_type; + int cpu = cpu_of(rq); + bool new_task; + int i; + + wallclock = sched_ktime_clock(); + + update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); + update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0); + new_task = is_new_task(p); + + cpu_time = &rq->grp_time; + if (event == ADD_TASK) { + migrate_type = RQ_TO_GROUP; + + src_curr_runnable_sum = &rq->curr_runnable_sum; + dst_curr_runnable_sum = &cpu_time->curr_runnable_sum; + src_prev_runnable_sum = &rq->prev_runnable_sum; + dst_prev_runnable_sum = &cpu_time->prev_runnable_sum; + + src_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum; + dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum; + src_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum; + dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum; + + *src_curr_runnable_sum -= p->ravg.curr_window_cpu[cpu]; + *src_prev_runnable_sum -= p->ravg.prev_window_cpu[cpu]; + if (new_task) { + *src_nt_curr_runnable_sum -= + p->ravg.curr_window_cpu[cpu]; + *src_nt_prev_runnable_sum -= + p->ravg.prev_window_cpu[cpu]; + } + + update_cluster_load_subtractions(p, cpu, + rq->window_start, new_task); + + } else { + migrate_type = GROUP_TO_RQ; + + src_curr_runnable_sum = &cpu_time->curr_runnable_sum; + dst_curr_runnable_sum = &rq->curr_runnable_sum; + src_prev_runnable_sum = &cpu_time->prev_runnable_sum; + dst_prev_runnable_sum = &rq->prev_runnable_sum; + + src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum; + dst_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum; + src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum; + dst_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum; + + *src_curr_runnable_sum -= p->ravg.curr_window; + *src_prev_runnable_sum -= p->ravg.prev_window; + if (new_task) { + *src_nt_curr_runnable_sum -= p->ravg.curr_window; + *src_nt_prev_runnable_sum -= p->ravg.prev_window; + } + + /* + * Need to reset curr/prev windows for all CPUs, not just the + * ones in the same cluster. Since inter cluster migrations + * did not result in the appropriate book keeping, the values + * per CPU would be inaccurate. + */ + for_each_possible_cpu(i) { + p->ravg.curr_window_cpu[i] = 0; + p->ravg.prev_window_cpu[i] = 0; + } + } + + *dst_curr_runnable_sum += p->ravg.curr_window; + *dst_prev_runnable_sum += p->ravg.prev_window; + if (new_task) { + *dst_nt_curr_runnable_sum += p->ravg.curr_window; + *dst_nt_prev_runnable_sum += p->ravg.prev_window; + } + + /* + * When a task enter or exits a group, it's curr and prev windows are + * moved to a single CPU. This behavior might be sub-optimal in the + * exit case, however, it saves us the overhead of handling inter + * cluster migration fixups while the task is part of a related group. + */ + p->ravg.curr_window_cpu[cpu] = p->ravg.curr_window; + p->ravg.prev_window_cpu[cpu] = p->ravg.prev_window; + + trace_sched_migration_update_sum(p, migrate_type, rq); +} + static void remove_task_from_group(struct task_struct *p) { struct related_thread_group *grp = p->grp; @@ -78,6 +191,7 @@ static void remove_task_from_group(struct task_struct *p) unsigned long irqflag; rq = __task_rq_lock(p, &flag); + transfer_busy_time(rq, p->grp, p, REM_TASK); raw_spin_lock_irqsave(&grp->lock, irqflag); list_del_init(&p->grp_list); @@ -121,12 +235,17 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp) * reference of p->grp in various hot-paths */ rq = __task_rq_lock(p, &flag); + transfer_busy_time(rq, grp, p, ADD_TASK); raw_spin_lock_irqsave(&grp->lock, irqflag); list_add(&p->grp_list, &grp->tasks); rcu_assign_pointer(p->grp, grp); - if (p->on_cpu) + if (p->on_cpu) { grp->nr_running++; + if (grp->nr_running == 1) + grp->mark_start = max(grp->mark_start, + sched_ktime_clock()); + } raw_spin_unlock_irqrestore(&grp->lock, irqflag); __task_rq_unlock(rq, &flag); @@ -232,6 +351,157 @@ void update_group_nr_running(struct task_struct *p, int event) rcu_read_unlock(); } +int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size) +{ + struct related_thread_group *grp = NULL; + unsigned long flag; + + if (!window_size) + return -EINVAL; + + grp = lookup_related_thread_group(grp_id); + if (!grp) { + pr_err("set window size for group %d fail\n", grp_id); + return -ENODEV; + } + + raw_spin_lock_irqsave(&grp->lock, flag); + grp->window_size = window_size; + raw_spin_unlock_irqrestore(&grp->lock, flag); + + return 0; +} + +void group_time_rollover(struct group_ravg *ravg) +{ + ravg->prev_window_load = ravg->curr_window_load; + ravg->curr_window_load = 0; + ravg->prev_window_exec = ravg->curr_window_exec; + ravg->curr_window_exec = 0; +} + +int sched_set_group_window_rollover(unsigned int grp_id) +{ + struct related_thread_group *grp = NULL; + u64 wallclock; + unsigned long flag; + + grp = lookup_related_thread_group(grp_id); + if (!grp) { + pr_err("set window start for group %d fail\n", grp_id); + return -ENODEV; + } + + raw_spin_lock_irqsave(&grp->lock, flag); + + wallclock = sched_ktime_clock(); + grp->prev_window_time = wallclock - grp->window_start; + grp->window_start = wallclock; + + group_time_rollover(&grp->ravg); + raw_spin_unlock_irqrestore(&grp->lock, flag); + + return 0; +} + +static void add_to_group_time(struct related_thread_group *grp, struct rq *rq, u64 wallclock) +{ + u64 delta_exec, delta_load; + u64 mark_start = grp->mark_start; + u64 window_start = grp->window_start; + + if (unlikely(wallclock <= mark_start)) + return; + + /* per group load tracking in RTG */ + if (likely(mark_start >= window_start)) { + /* + * ws ms wc + * | | | + * V V V + * |---------------| + */ + delta_exec = wallclock - mark_start; + grp->ravg.curr_window_exec += delta_exec; + + delta_load = scale_exec_time(delta_exec, rq); + grp->ravg.curr_window_load += delta_load; + } else { + /* + * ms ws wc + * | | | + * V V V + * -----|---------- + */ + /* prev window statistic */ + delta_exec = window_start - mark_start; + grp->ravg.prev_window_exec += delta_exec; + + delta_load = scale_exec_time(delta_exec, rq); + grp->ravg.prev_window_load += delta_load; + + /* curr window statistic */ + delta_exec = wallclock - window_start; + grp->ravg.curr_window_exec += delta_exec; + + delta_load = scale_exec_time(delta_exec, rq); + grp->ravg.curr_window_load += delta_load; + } +} + +static inline void add_to_group_demand(struct related_thread_group *grp, + struct rq *rq, u64 wallclock) +{ + if (unlikely(wallclock <= grp->window_start)) + return; + + add_to_group_time(grp, rq, wallclock); +} + +static int account_busy_for_group_demand(struct task_struct *p, int event) +{ + /* + *No need to bother updating task demand for exiting tasks + * or the idle task. + */ + if (exiting_task(p) || is_idle_task(p)) + return 0; + + if (event == TASK_WAKE || event == TASK_MIGRATE) + return 0; + + return 1; +} + +void update_group_demand(struct task_struct *p, struct rq *rq, + int event, u64 wallclock) +{ + struct related_thread_group *grp; + + if (!account_busy_for_group_demand(p, event)) + return; + + rcu_read_lock(); + grp = task_related_thread_group(p); + if (!grp) { + rcu_read_unlock(); + return; + } + + raw_spin_lock(&grp->lock); + + if (grp->nr_running == 1) + grp->mark_start = max(grp->mark_start, p->ravg.mark_start); + + add_to_group_demand(grp, rq, wallclock); + + grp->mark_start = wallclock; + + raw_spin_unlock(&grp->lock); + + rcu_read_unlock(); +} + #ifdef CONFIG_SCHED_RTG_DEBUG #define seq_printf_rtg(m, x...) \ do { \ diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h index 80661f8b2d32..5970d28cadef 100644 --- a/kernel/sched/rtg/rtg.h +++ b/kernel/sched/rtg/rtg.h @@ -14,6 +14,13 @@ int alloc_related_thread_groups(void); struct related_thread_group *lookup_related_thread_group(unsigned int group_id); struct related_thread_group *task_related_thread_group(struct task_struct *p); void update_group_nr_running(struct task_struct *p, int event); +struct rq; +void update_group_demand(struct task_struct *p, struct rq *rq, + int event, u64 wallclock); +int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size); +int sched_set_group_window_rollover(unsigned int grp_id); +struct group_cpu_time *group_update_cpu_time(struct rq *rq, + struct related_thread_group *grp); #else static inline int alloc_related_thread_groups(void) { return 0; } #endif /* CONFIG_SCHED_RTG */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 22ff400d5b08..fdb69a9ad1f9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1077,7 +1077,10 @@ struct rq { u64 nt_prev_runnable_sum; u64 cum_window_demand_scaled; struct load_subtractions load_subs[NUM_TRACKED_WINDOWS]; +#ifdef CONFIG_SCHED_RTG + struct group_cpu_time grp_time; #endif +#endif /* CONFIG_SCHED_WALT */ #ifdef CONFIG_IRQ_TIME_ACCOUNTING u64 prev_irq_time; diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index f560321b8691..a2824cc9bc2e 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -32,6 +32,8 @@ const char *task_event_names[] = {"PUT_PREV_TASK", "PICK_NEXT_TASK", "TASK_WAKE", "TASK_MIGRATE", "TASK_UPDATE", "IRQ_UPDATE"}; +const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP", + "RQ_TO_RQ", "GROUP_TO_GROUP"}; #define SCHED_FREQ_ACCOUNT_WAIT_TIME 0 #define SCHED_ACCOUNT_WAIT_TIME 1 @@ -476,6 +478,13 @@ void fixup_busy_time(struct task_struct *p, int new_cpu) struct rq *dest_rq = cpu_rq(new_cpu); u64 wallclock; bool new_task; +#ifdef CONFIG_SCHED_RTG + u64 *src_curr_runnable_sum, *dst_curr_runnable_sum; + u64 *src_prev_runnable_sum, *dst_prev_runnable_sum; + u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum; + u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum; + struct related_thread_group *grp; +#endif if (!p->on_rq && p->state != TASK_WAKING) return; @@ -513,9 +522,58 @@ void fixup_busy_time(struct task_struct *p, int new_cpu) } new_task = is_new_task(p); +#ifdef CONFIG_SCHED_RTG + /* Protected by rq_lock */ + grp = task_related_thread_group(p); + + /* + * For frequency aggregation, we continue to do migration fixups + * even for intra cluster migrations. This is because, the aggregated + * load has to reported on a single CPU regardless. + */ + if (grp) { + struct group_cpu_time *cpu_time; + + cpu_time = &src_rq->grp_time; + src_curr_runnable_sum = &cpu_time->curr_runnable_sum; + src_prev_runnable_sum = &cpu_time->prev_runnable_sum; + src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum; + src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum; + + cpu_time = &dest_rq->grp_time; + dst_curr_runnable_sum = &cpu_time->curr_runnable_sum; + dst_prev_runnable_sum = &cpu_time->prev_runnable_sum; + dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum; + dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum; + + if (p->ravg.curr_window) { + *src_curr_runnable_sum -= p->ravg.curr_window; + *dst_curr_runnable_sum += p->ravg.curr_window; + if (new_task) { + *src_nt_curr_runnable_sum -= + p->ravg.curr_window; + *dst_nt_curr_runnable_sum += + p->ravg.curr_window; + } + } - inter_cluster_migration_fixup(p, new_cpu, - task_cpu(p), new_task); + if (p->ravg.prev_window) { + *src_prev_runnable_sum -= p->ravg.prev_window; + *dst_prev_runnable_sum += p->ravg.prev_window; + if (new_task) { + *src_nt_prev_runnable_sum -= + p->ravg.prev_window; + *dst_nt_prev_runnable_sum += + p->ravg.prev_window; + } + } + } else { +#endif + inter_cluster_migration_fixup(p, new_cpu, + task_cpu(p), new_task); +#ifdef CONFIG_SCHED_RTG + } +#endif if (!same_freq_domain(new_cpu, task_cpu(p))) irq_work_queue(&walt_migration_irq_work); @@ -634,15 +692,6 @@ static void update_history(struct rq *rq, struct task_struct *p, #define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y) -static inline u64 scale_exec_time(u64 delta, struct rq *rq) -{ - unsigned long capcurr = capacity_curr_of(cpu_of(rq)); - - delta = (delta * capcurr) >> SCHED_CAPACITY_SHIFT; - - return delta; -} - static u64 add_to_task_demand(struct rq *rq, struct task_struct *p, u64 delta) { delta = scale_exec_time(delta, rq); @@ -712,6 +761,10 @@ static u64 update_task_demand(struct task_struct *p, struct rq *rq, u32 window_size = sched_ravg_window; u64 runtime; +#ifdef CONFIG_SCHED_RTG + update_group_demand(p, rq, event, wallclock); +#endif + new_window = mark_start < window_start; if (!account_busy_for_task_demand(rq, p, event)) { if (new_window) @@ -870,6 +923,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, u64 *nt_prev_runnable_sum = &rq->nt_prev_runnable_sum; bool new_task; int cpu = rq->cpu; +#ifdef CONFIG_SCHED_RTG + struct group_cpu_time *cpu_time; + struct related_thread_group *grp; +#endif new_window = mark_start < window_start; if (new_window) { @@ -895,6 +952,19 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (!account_busy_for_cpu_time(rq, p, irqtime, event)) goto done; +#ifdef CONFIG_SCHED_RTG + grp = task_related_thread_group(p); + if (grp) { + cpu_time = &rq->grp_time; + + curr_runnable_sum = &cpu_time->curr_runnable_sum; + prev_runnable_sum = &cpu_time->prev_runnable_sum; + + nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum; + nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum; + } +#endif + if (!new_window) { /* * account_busy_for_cpu_time() = 1 so busy time needs diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h index fcb1555d53f8..84da97ccce20 100644 --- a/kernel/sched/walt.h +++ b/kernel/sched/walt.h @@ -45,6 +45,15 @@ static inline struct sched_cluster *cpu_cluster(int cpu) return cpu_rq(cpu)->cluster; } +static inline u64 scale_exec_time(u64 delta, struct rq *rq) +{ + unsigned long capcurr = capacity_curr_of(cpu_of(rq)); + + delta = (delta * capcurr) >> SCHED_CAPACITY_SHIFT; + + return delta; +} + static inline bool is_new_task(struct task_struct *p) { return p->ravg.active_windows < SCHED_NEW_TASK_WINDOWS; @@ -192,6 +201,8 @@ static inline void assign_cluster_ids(struct list_head *head) } } +extern void update_cluster_load_subtractions(struct task_struct *p, + int cpu, u64 ws, bool new_task); #else /* CONFIG_SCHED_WALT */ static inline void walt_sched_init_rq(struct rq *rq) { } -- Gitee From 39ceb100c088bef899f96f2b733fe1f6b90b0564 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 14 Feb 2022 13:19:01 +0800 Subject: [PATCH 05/11] sched: scehd: Introduce sched_update_rtg_tick() ohos inclusion category: feature issue: #I4SULH CVE: NA ------------------------------------------- sched_update_rtg_tick() is called in tick. Signed-off-by: Li Ming --- include/linux/sched/rtg.h | 7 +++++++ kernel/sched/core.c | 3 +++ kernel/sched/rtg/rtg.c | 17 +++++++++++++++++ kernel/sched/rtg/rtg.h | 1 + 4 files changed, 28 insertions(+) diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h index a35114766acb..b5cc92fcece9 100644 --- a/include/linux/sched/rtg.h +++ b/include/linux/sched/rtg.h @@ -24,6 +24,8 @@ struct group_ravg { unsigned long normalized_util; }; +struct rtg_class; + struct related_thread_group { int id; raw_spinlock_t lock; @@ -37,6 +39,11 @@ struct related_thread_group { u64 prev_window_time; /* rtg window information for WALT */ unsigned int window_size; + const struct rtg_class *rtg_class; +}; + +struct rtg_class { + void (*sched_update_rtg_tick)(struct related_thread_group *grp); }; int sched_set_group_id(struct task_struct *p, unsigned int group_id); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 574c155b9e3a..20dd5009e315 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4124,6 +4124,9 @@ void scheduler_tick(void) rq_unlock(rq, &rf); +#ifdef CONFIG_SCHED_RTG + sched_update_rtg_tick(curr); +#endif perf_event_task_tick(); #ifdef CONFIG_SMP diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c index 76d8f366fff5..78fbcd1b9cd3 100644 --- a/kernel/sched/rtg/rtg.c +++ b/kernel/sched/rtg/rtg.c @@ -502,6 +502,23 @@ void update_group_demand(struct task_struct *p, struct rq *rq, rcu_read_unlock(); } +void sched_update_rtg_tick(struct task_struct *p) +{ + struct related_thread_group *grp = NULL; + + rcu_read_lock(); + grp = task_related_thread_group(p); + if (!grp || list_empty(&grp->tasks)) { + rcu_read_unlock(); + return; + } + + if (grp->rtg_class && grp->rtg_class->sched_update_rtg_tick) + grp->rtg_class->sched_update_rtg_tick(grp); + + rcu_read_unlock(); +} + #ifdef CONFIG_SCHED_RTG_DEBUG #define seq_printf_rtg(m, x...) \ do { \ diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h index 5970d28cadef..e32c67aebb96 100644 --- a/kernel/sched/rtg/rtg.h +++ b/kernel/sched/rtg/rtg.h @@ -21,6 +21,7 @@ int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size); int sched_set_group_window_rollover(unsigned int grp_id); struct group_cpu_time *group_update_cpu_time(struct rq *rq, struct related_thread_group *grp); +void sched_update_rtg_tick(struct task_struct *p); #else static inline int alloc_related_thread_groups(void) { return 0; } #endif /* CONFIG_SCHED_RTG */ -- Gitee From c18d29d98ab6d78f6ba5a16b4ba980963368624c Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 14 Feb 2022 14:51:40 +0800 Subject: [PATCH 06/11] sched: Introduce perferred cluster to optimize cpu selection for related threads codeaurora inclusion category: feature issue: #I4SULH CVE: NA Signed-off-by: Li Ming ------------------------------------------- Set the preferred cluster of group according to the group load and prioritize cpu selection for related threads from preferred cluster Signed-off-by: Vikram Mulukutla Signed-off-by: Satya Durga Srinivasu Prabhala Signed-off-by: Srinath Sridharan --- include/linux/sched/rtg.h | 1 + kernel/sched/fair.c | 76 ++++++++++++++-- kernel/sched/rtg/rtg.c | 186 ++++++++++++++++++++++++++++++++++++++ kernel/sched/rtg/rtg.h | 12 +++ kernel/sched/sched.h | 13 ++- kernel/sched/walt.h | 6 ++ 6 files changed, 286 insertions(+), 8 deletions(-) diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h index b5cc92fcece9..eae7f83808ff 100644 --- a/include/linux/sched/rtg.h +++ b/include/linux/sched/rtg.h @@ -40,6 +40,7 @@ struct related_thread_group { /* rtg window information for WALT */ unsigned int window_size; const struct rtg_class *rtg_class; + struct sched_cluster *preferred_cluster; }; struct rtg_class { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 42d51caa611c..3b8d6c1dfc30 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -22,6 +22,7 @@ */ #include "sched.h" #include "walt.h" +#include "rtg/rtg.h" #ifdef CONFIG_SCHED_WALT static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p, @@ -773,7 +774,6 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu); static unsigned long task_h_load(struct task_struct *p); -static unsigned long capacity_of(int cpu); /* Give new sched_entity start runnable values to heavy its load in infant time */ void init_entity_runnable_average(struct sched_entity *se) @@ -4104,8 +4104,27 @@ static inline int task_fits_capacity(struct task_struct *p, long capacity) return fits_capacity(uclamp_task_util(p), capacity); } +#ifdef CONFIG_SCHED_RTG +bool task_fits_max(struct task_struct *p, int cpu) +{ + unsigned long capacity = capacity_orig_of(cpu); + unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity; + + if (capacity == max_capacity) + return true; + + return task_fits_capacity(p, capacity); +} +#endif + static inline void update_misfit_status(struct task_struct *p, struct rq *rq) { + bool task_fits = false; +#ifdef CONFIG_SCHED_RTG + int cpu = cpu_of(rq); + struct cpumask *rtg_target = NULL; +#endif + if (!static_branch_unlikely(&sched_asym_cpucapacity)) return; @@ -4114,7 +4133,17 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) return; } - if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) { +#ifdef CONFIG_SCHED_RTG + rtg_target = find_rtg_target(p); + if (rtg_target) + task_fits = capacity_orig_of(cpu) >= + capacity_orig_of(cpumask_first(rtg_target)); + else + task_fits = task_fits_capacity(p, capacity_of(cpu_of(rq))); +#else + task_fits = task_fits_capacity(p, capacity_of(cpu_of(rq))); +#endif + if (task_fits) { rq->misfit_task_load = 0; return; } @@ -5805,11 +5834,6 @@ static unsigned long cpu_runnable_without(struct rq *rq, struct task_struct *p) return runnable; } -static unsigned long capacity_of(int cpu) -{ - return cpu_rq(cpu)->cpu_capacity; -} - static void record_wakee(struct task_struct *p) { /* @@ -6574,6 +6598,12 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p) return min_t(unsigned long, util, capacity_orig_of(cpu)); } +#ifdef CONFIG_SCHED_RTG +unsigned long capacity_spare_without(int cpu, struct task_struct *p) +{ + return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0); +} +#endif /* * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued) * to @dst_cpu. @@ -6840,6 +6870,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f int new_cpu = prev_cpu; int want_affine = 0; int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING); +#ifdef CONFIG_SCHED_RTG + int target_cpu = -1; + target_cpu = find_rtg_cpu(p); + if (target_cpu >= 0) + return target_cpu; +#endif if (sd_flag & SD_BALANCE_WAKE) { record_wakee(p); @@ -7524,6 +7560,7 @@ enum migration_type { #define LBF_SOME_PINNED 0x08 #define LBF_NOHZ_STATS 0x10 #define LBF_NOHZ_AGAIN 0x20 +#define LBF_IGNORE_PREFERRED_CLUSTER_TASKS 0x200 struct lb_env { struct sched_domain *sd; @@ -7706,6 +7743,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) /* Record that we found atleast one task that could run on dst_cpu */ env->flags &= ~LBF_ALL_PINNED; + +#ifdef CONFIG_SCHED_RTG + if (env->flags & LBF_IGNORE_PREFERRED_CLUSTER_TASKS && + !preferred_cluster(cpu_rq(env->dst_cpu)->cluster, p)) + return 0; +#endif + if (task_running(env->src_rq, p)) { schedstat_inc(p->se.statistics.nr_failed_migrations_running); return 0; @@ -7798,12 +7842,21 @@ static int detach_tasks(struct lb_env *env) unsigned long util, load; struct task_struct *p; int detached = 0; +#ifdef CONFIG_SCHED_RTG + int orig_loop = env->loop; +#endif lockdep_assert_held(&env->src_rq->lock); if (env->imbalance <= 0) return 0; +#ifdef CONFIG_SCHED_RTG + if (!same_cluster(env->dst_cpu, env->src_cpu)) + env->flags |= LBF_IGNORE_PREFERRED_CLUSTER_TASKS; + +redo: +#endif while (!list_empty(tasks)) { /* * We don't want to steal all, otherwise we may be treated likewise, @@ -7905,6 +7958,15 @@ static int detach_tasks(struct lb_env *env) list_move(&p->se.group_node, tasks); } +#ifdef CONFIG_SCHED_RTG + if (env->flags & LBF_IGNORE_PREFERRED_CLUSTER_TASKS && !detached) { + tasks = &env->src_rq->cfs_tasks; + env->flags &= ~LBF_IGNORE_PREFERRED_CLUSTER_TASKS; + env->loop = orig_loop; + goto redo; + } +#endif + /* * Right now, this is one of only two places we collect this stat * so we can safely collect detach_one_task() stats here rather diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c index 78fbcd1b9cd3..200895617a71 100644 --- a/kernel/sched/rtg/rtg.c +++ b/kernel/sched/rtg/rtg.c @@ -4,6 +4,7 @@ * */ #include +#include #include #include "../sched.h" @@ -182,6 +183,8 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp, trace_sched_migration_update_sum(p, migrate_type, rq); } +static void _set_preferred_cluster(struct related_thread_group *grp, + int sched_cluster_id); static void remove_task_from_group(struct task_struct *p) { struct related_thread_group *grp = p->grp; @@ -207,6 +210,8 @@ static void remove_task_from_group(struct task_struct *p) if (!list_empty(&grp->tasks)) empty_group = false; + else + _set_preferred_cluster(grp, -1); raw_spin_unlock_irqrestore(&grp->lock, irqflag); __task_rq_unlock(rq, &flag); @@ -519,6 +524,185 @@ void sched_update_rtg_tick(struct task_struct *p) rcu_read_unlock(); } +int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p) +{ + struct related_thread_group *grp = NULL; + int rc = 1; + + rcu_read_lock(); + + grp = task_related_thread_group(p); + if (grp != NULL) + rc = (grp->preferred_cluster == cluster); + + rcu_read_unlock(); + return rc; +} + +unsigned int get_cluster_grp_running(int cluster_id) +{ + struct related_thread_group *grp = NULL; + unsigned int total_grp_running = 0; + unsigned long flag, rtg_flag; + unsigned int i; + + read_lock_irqsave(&related_thread_group_lock, rtg_flag); + + /* grp_id 0 is used for exited tasks */ + for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) { + grp = lookup_related_thread_group(i); + if (!grp) + continue; + + raw_spin_lock_irqsave(&grp->lock, flag); + if (grp->preferred_cluster != NULL && + grp->preferred_cluster->id == cluster_id) + total_grp_running += grp->nr_running; + raw_spin_unlock_irqrestore(&grp->lock, flag); + } + read_unlock_irqrestore(&related_thread_group_lock, rtg_flag); + + return total_grp_running; +} + +static void _set_preferred_cluster(struct related_thread_group *grp, + int sched_cluster_id) +{ + struct sched_cluster *cluster = NULL; + struct sched_cluster *cluster_found = NULL; + + if (sched_cluster_id == -1) { + grp->preferred_cluster = NULL; + return; + } + + for_each_sched_cluster_reverse(cluster) { + if (cluster->id == sched_cluster_id) { + cluster_found = cluster; + break; + } + } + + if (cluster_found != NULL) + grp->preferred_cluster = cluster_found; + else + pr_err("cannot found sched_cluster_id=%d\n", sched_cluster_id); +} + +/* + * sched_cluster_id == -1: grp will set to NULL + */ +static void set_preferred_cluster(struct related_thread_group *grp, + int sched_cluster_id) +{ + unsigned long flag; + + raw_spin_lock_irqsave(&grp->lock, flag); + _set_preferred_cluster(grp, sched_cluster_id); + raw_spin_unlock_irqrestore(&grp->lock, flag); +} + +int sched_set_group_preferred_cluster(unsigned int grp_id, int sched_cluster_id) +{ + struct related_thread_group *grp = NULL; + + /* DEFAULT_CGROUP_COLOC_ID is a reserved id */ + if (grp_id == DEFAULT_CGROUP_COLOC_ID || + grp_id >= MAX_NUM_CGROUP_COLOC_ID) + return -EINVAL; + + grp = lookup_related_thread_group(grp_id); + if (!grp) { + pr_err("set preferred cluster for group %d fail\n", grp_id); + return -ENODEV; + } + set_preferred_cluster(grp, sched_cluster_id); + + return 0; +} + +struct cpumask *find_rtg_target(struct task_struct *p) +{ + struct related_thread_group *grp = NULL; + struct sched_cluster *preferred_cluster = NULL; + struct cpumask *rtg_target = NULL; + + rcu_read_lock(); + grp = task_related_thread_group(p); + rcu_read_unlock(); + + if (!grp) + return NULL; + + preferred_cluster = grp->preferred_cluster; + if (!preferred_cluster) + return NULL; + + rtg_target = &preferred_cluster->cpus; + if (!task_fits_max(p, cpumask_first(rtg_target))) + return NULL; + + return rtg_target; +} + +int find_rtg_cpu(struct task_struct *p) +{ + int i; + cpumask_t search_cpus = CPU_MASK_NONE; + int max_spare_cap_cpu = -1; + unsigned long max_spare_cap = 0; + int idle_backup_cpu = -1; + struct cpumask *preferred_cpus = find_rtg_target(p); + + if (!preferred_cpus) + return -1; + + cpumask_and(&search_cpus, p->cpus_ptr, cpu_online_mask); +#ifdef CONFIG_CPU_ISOLATION_OPT + cpumask_andnot(&search_cpus, &search_cpus, cpu_isolated_mask); +#endif + + /* search the perferred idle cpu */ + for_each_cpu_and(i, &search_cpus, preferred_cpus) { + if (is_reserved(i)) + continue; + + if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING)) + return i; + } + + for_each_cpu(i, &search_cpus) { + unsigned long spare_cap; + + if (sched_cpu_high_irqload(i)) + continue; + + if (is_reserved(i)) + continue; + + /* take the Active LB CPU as idle_backup_cpu */ + if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING)) { + /* find the idle_backup_cpu with max capacity */ + if (idle_backup_cpu == -1 || + capacity_orig_of(i) > capacity_orig_of(idle_backup_cpu)) + idle_backup_cpu = i; + + continue; + } + + spare_cap = capacity_spare_without(i, p); + if (spare_cap > max_spare_cap) { + max_spare_cap = spare_cap; + max_spare_cap_cpu = i; + } + } + + if (idle_backup_cpu != -1) + return idle_backup_cpu; + + return max_spare_cap_cpu; +} + #ifdef CONFIG_SCHED_RTG_DEBUG #define seq_printf_rtg(m, x...) \ do { \ @@ -532,6 +716,8 @@ static void print_rtg_info(struct seq_file *file, const struct related_thread_group *grp) { seq_printf_rtg(file, "RTG_ID : %d\n", grp->id); + seq_printf_rtg(file, "RTG_CLUSTER : %d\n", + grp->preferred_cluster ? grp->preferred_cluster->id : -1); } static char rtg_task_state_to_char(const struct task_struct *tsk) diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h index e32c67aebb96..a158ab74f292 100644 --- a/kernel/sched/rtg/rtg.h +++ b/kernel/sched/rtg/rtg.h @@ -8,6 +8,9 @@ #include #include +#define for_each_sched_cluster_reverse(cluster) \ + list_for_each_entry_reverse(cluster, &cluster_head, list) + #ifdef CONFIG_SCHED_RTG void init_task_rtg(struct task_struct *p); int alloc_related_thread_groups(void); @@ -22,7 +25,16 @@ int sched_set_group_window_rollover(unsigned int grp_id); struct group_cpu_time *group_update_cpu_time(struct rq *rq, struct related_thread_group *grp); void sched_update_rtg_tick(struct task_struct *p); +int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p); +int sched_set_group_preferred_cluster(unsigned int grp_id, int sched_cluster_id); +struct cpumask *find_rtg_target(struct task_struct *p); +int find_rtg_cpu(struct task_struct *p); #else static inline int alloc_related_thread_groups(void) { return 0; } +static inline int sched_set_group_preferred_cluster(unsigned int grp_id, + int sched_cluster_id) +{ + return 0; +} #endif /* CONFIG_SCHED_RTG */ #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fdb69a9ad1f9..9630e3c00558 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -477,7 +477,6 @@ struct task_group { /* Effective clamp values used for a task group */ struct uclamp_se uclamp[UCLAMP_CNT]; #endif - }; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -2594,6 +2593,11 @@ static inline bool uclamp_is_used(void) #endif #ifdef CONFIG_SMP +static inline unsigned long capacity_of(int cpu) +{ + return cpu_rq(cpu)->cpu_capacity; +} + static inline unsigned long capacity_orig_of(int cpu) { return cpu_rq(cpu)->cpu_capacity_orig; @@ -2748,6 +2752,13 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) void swake_up_all_locked(struct swait_queue_head *q); void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); +#ifdef CONFIG_SCHED_RTG +extern bool task_fits_max(struct task_struct *p, int cpu); +extern unsigned long capacity_spare_without(int cpu, struct task_struct *p); +extern int update_preferred_cluster(struct related_thread_group *grp, + struct task_struct *p, u32 old_load, bool from_tick); +#endif + #ifdef CONFIG_SCHED_WALT static inline int cluster_first_cpu(struct sched_cluster *cluster) { diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h index 84da97ccce20..a1fba5b65640 100644 --- a/kernel/sched/walt.h +++ b/kernel/sched/walt.h @@ -45,6 +45,11 @@ static inline struct sched_cluster *cpu_cluster(int cpu) return cpu_rq(cpu)->cluster; } +static inline int same_cluster(int src_cpu, int dst_cpu) +{ + return cpu_rq(src_cpu)->cluster == cpu_rq(dst_cpu)->cluster; +} + static inline u64 scale_exec_time(u64 delta, struct rq *rq) { unsigned long capcurr = capacity_curr_of(cpu_of(rq)); @@ -243,6 +248,7 @@ static inline int sched_cpu_high_irqload(int cpu) { return 0; } +static inline int same_cluster(int src_cpu, int dst_cpu) { return 1; } #endif /* CONFIG_SCHED_WALT */ #endif /* __WALT_H */ -- Gitee From ffa3ddb88dd6f59899a000ac6a98e19a37092371 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 14 Feb 2022 20:44:20 +0800 Subject: [PATCH 07/11] sched: Add interfaces for normalized utilization of related thread group codeaurora inclusion category: feature issue: #I4SULH CVE: NA Signed-off-by: Li Ming ------------------------------------------- Use normalized util as RTG util and support the RTG util invalid interval adjustable. Signed-off-by: Vikram Mulukutla Signed-off-by: Satya Durga Srinivasu Prabhala Signed-off-by: Srinath Sridharan --- include/linux/sched/rtg.h | 4 + kernel/sched/rtg/rtg.c | 214 +++++++++++++++++++++++++++++++++++++- kernel/sched/rtg/rtg.h | 11 +- kernel/sched/walt.c | 2 +- 4 files changed, 226 insertions(+), 5 deletions(-) diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h index eae7f83808ff..d27e1507e334 100644 --- a/include/linux/sched/rtg.h +++ b/include/linux/sched/rtg.h @@ -41,6 +41,10 @@ struct related_thread_group { unsigned int window_size; const struct rtg_class *rtg_class; struct sched_cluster *preferred_cluster; + int max_boost; + unsigned long util_invalid_interval; /* in nanoseconds */ + unsigned long util_update_timeout; /* in nanoseconds */ + u64 last_util_update_time; }; struct rtg_class { diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c index 200895617a71..016b2143ea8c 100644 --- a/kernel/sched/rtg/rtg.c +++ b/kernel/sched/rtg/rtg.c @@ -15,6 +15,8 @@ #define REM_TASK 1 #define DEFAULT_GROUP_RATE 60 /* 60FPS */ +#define DEFAULT_UTIL_INVALID_INTERVAL (~0U) /* ns */ +#define DEFAULT_UTIL_UPDATE_TIMEOUT 20000000 /* ns */ struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID]; static DEFINE_RWLOCK(related_thread_group_lock); @@ -57,6 +59,9 @@ int alloc_related_thread_groups(void) INIT_LIST_HEAD(&grp->tasks); INIT_LIST_HEAD(&grp->list); grp->window_size = NSEC_PER_SEC / DEFAULT_GROUP_RATE; + grp->util_invalid_interval = DEFAULT_UTIL_INVALID_INTERVAL; + grp->util_update_timeout = DEFAULT_UTIL_UPDATE_TIMEOUT; + grp->max_boost = 0; raw_spin_lock_init(&grp->lock); related_thread_groups[i] = grp; @@ -208,10 +213,15 @@ static void remove_task_from_group(struct task_struct *p) grp->nr_running = 0; } - if (!list_empty(&grp->tasks)) + if (!list_empty(&grp->tasks)) { empty_group = false; - else + } else { +#ifdef CONFIG_UCLAMP_TASK + grp->max_boost = 0; +#endif _set_preferred_cluster(grp, -1); + grp->ravg.normalized_util = 0; + } raw_spin_unlock_irqrestore(&grp->lock, irqflag); __task_rq_unlock(rq, &flag); @@ -234,6 +244,9 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp) struct rq *rq = NULL; struct rq_flags flag; unsigned long irqflag; +#ifdef CONFIG_UCLAMP_TASK + int boost; +#endif /* * Change p->grp under rq->lock. Will prevent races with read-side @@ -252,6 +265,11 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp) sched_ktime_clock()); } +#ifdef CONFIG_UCLAMP_TASK + boost = (int)uclamp_eff_value(p, UCLAMP_MIN); + if (boost > grp->max_boost) + grp->max_boost = boost; +#endif raw_spin_unlock_irqrestore(&grp->lock, irqflag); __task_rq_unlock(rq, &flag); @@ -328,9 +346,10 @@ unsigned int sched_get_group_id(struct task_struct *p) return group_id; } -void update_group_nr_running(struct task_struct *p, int event) +void update_group_nr_running(struct task_struct *p, int event, u64 wallclock) { struct related_thread_group *grp; + bool need_update = false; rcu_read_lock(); grp = task_related_thread_group(p); @@ -351,9 +370,17 @@ void update_group_nr_running(struct task_struct *p, int event) grp->nr_running = 0; } + /* update preferred cluster if no update long */ + if (wallclock - grp->last_util_update_time > grp->util_update_timeout) + need_update = true; + raw_spin_unlock(&grp->lock); rcu_read_unlock(); + + if (need_update && grp->rtg_class && grp->rtg_class->sched_update_rtg_tick && + grp->id != DEFAULT_CGROUP_COLOC_ID) + grp->rtg_class->sched_update_rtg_tick(grp); } int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size) @@ -390,6 +417,10 @@ int sched_set_group_window_rollover(unsigned int grp_id) struct related_thread_group *grp = NULL; u64 wallclock; unsigned long flag; +#ifdef CONFIG_UCLAMP_TASK + struct task_struct *p = NULL; + int boost; +#endif grp = lookup_related_thread_group(grp_id); if (!grp) { @@ -402,6 +433,15 @@ int sched_set_group_window_rollover(unsigned int grp_id) wallclock = sched_ktime_clock(); grp->prev_window_time = wallclock - grp->window_start; grp->window_start = wallclock; + grp->max_boost = 0; + +#ifdef CONFIG_UCLAMP_TASK + list_for_each_entry(p, &grp->tasks, grp_list) { + boost = (int)uclamp_eff_value(p, UCLAMP_MIN); + if (boost > 0) + grp->max_boost = boost; + } +#endif group_time_rollover(&grp->ravg); raw_spin_unlock_irqrestore(&grp->lock, flag); @@ -703,6 +743,172 @@ int find_rtg_cpu(struct task_struct *p) return max_spare_cap_cpu; } +int sched_set_group_util_invalid_interval(unsigned int grp_id, + unsigned int interval) +{ + struct related_thread_group *grp = NULL; + unsigned long flag; + + if (interval == 0) + return -EINVAL; + + /* DEFAULT_CGROUP_COLOC_ID is a reserved id */ + if (grp_id == DEFAULT_CGROUP_COLOC_ID || + grp_id >= MAX_NUM_CGROUP_COLOC_ID) + return -EINVAL; + + grp = lookup_related_thread_group(grp_id); + if (!grp) { + pr_err("set invalid interval for group %d fail\n", grp_id); + return -ENODEV; + } + + raw_spin_lock_irqsave(&grp->lock, flag); + if ((signed int)interval < 0) + grp->util_invalid_interval = DEFAULT_UTIL_INVALID_INTERVAL; + else + grp->util_invalid_interval = interval * NSEC_PER_MSEC; + + raw_spin_unlock_irqrestore(&grp->lock, flag); + + return 0; +} + +static inline bool +group_should_invalid_util(struct related_thread_group *grp, u64 now) +{ + if (grp->util_invalid_interval == DEFAULT_UTIL_INVALID_INTERVAL) + return false; + + return true; +} + +static inline bool valid_normalized_util(struct related_thread_group *grp) +{ + struct task_struct *p = NULL; + cpumask_t rtg_cpus = CPU_MASK_NONE; + bool valid = false; + + if (grp->nr_running != 0) { + list_for_each_entry(p, &grp->tasks, grp_list) { + get_task_struct(p); + if (p->state == TASK_RUNNING) + cpumask_set_cpu(task_cpu(p), &rtg_cpus); + put_task_struct(p); + } + + valid = cpumask_intersects(&rtg_cpus, + &grp->preferred_cluster->cpus); + } + + return valid; +} + +void sched_get_max_group_util(const struct cpumask *query_cpus, + unsigned long *util, unsigned int *freq) +{ + struct related_thread_group *grp = NULL; + unsigned long max_grp_util = 0; + unsigned int max_grp_freq = 0; + u64 now = ktime_get_ns(); + unsigned long rtg_flag; + unsigned long flag; + + /* + * sum the prev_runnable_sum for each rtg, + * return the max rtg->load + */ + read_lock_irqsave(&related_thread_group_lock, rtg_flag); + if (list_empty(&active_related_thread_groups)) + goto unlock; + + for_each_related_thread_group(grp) { + raw_spin_lock_irqsave(&grp->lock, flag); + if (!list_empty(&grp->tasks) && + grp->preferred_cluster != NULL && + cpumask_intersects(query_cpus, + &grp->preferred_cluster->cpus) && + !group_should_invalid_util(grp, now)) { + + if (grp->ravg.normalized_util > max_grp_util && + valid_normalized_util(grp)) + max_grp_util = grp->ravg.normalized_util; + } + raw_spin_unlock_irqrestore(&grp->lock, flag); + } + +unlock: + read_unlock_irqrestore(&related_thread_group_lock, rtg_flag); + + *freq = max_grp_freq; + *util = max_grp_util; +} + +static struct sched_cluster *best_cluster(struct related_thread_group *grp) +{ + struct sched_cluster *cluster = NULL; + struct sched_cluster *max_cluster = NULL; + int cpu; + unsigned long util = grp->ravg.normalized_util; + unsigned long boosted_grp_util = util + grp->max_boost; + unsigned long max_cap = 0; + unsigned long cap = 0; + + /* find new cluster */ + for_each_sched_cluster(cluster) { + cpu = cpumask_first(&cluster->cpus); + cap = capacity_orig_of(cpu); + if (cap > max_cap) { + max_cap = cap; + max_cluster = cluster; + } + + if (boosted_grp_util <= cap) + return cluster; + } + + return max_cluster; +} + +int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util, + unsigned int flag) +{ + struct related_thread_group *grp = NULL; + u64 now; + unsigned long flags; + struct sched_cluster *preferred_cluster = NULL; + + grp = lookup_related_thread_group(grp_id); + if (!grp) { + pr_err("set normalized util for group %d fail\n", grp_id); + return -ENODEV; + } + + raw_spin_lock_irqsave(&grp->lock, flags); + + if (list_empty(&grp->tasks)) { + raw_spin_unlock_irqrestore(&grp->lock, flags); + return 0; + } + + grp->ravg.normalized_util = util; + + preferred_cluster = best_cluster(grp); + + /* update prev_cluster force when preferred_cluster changed */ + if (!grp->preferred_cluster) + grp->preferred_cluster = preferred_cluster; + else if (grp->preferred_cluster != preferred_cluster) + grp->preferred_cluster = preferred_cluster; + + now = ktime_get_ns(); + grp->last_util_update_time = now; + + raw_spin_unlock_irqrestore(&grp->lock, flags); + + return 0; +} + #ifdef CONFIG_SCHED_RTG_DEBUG #define seq_printf_rtg(m, x...) \ do { \ @@ -716,6 +922,8 @@ static void print_rtg_info(struct seq_file *file, const struct related_thread_group *grp) { seq_printf_rtg(file, "RTG_ID : %d\n", grp->id); + seq_printf_rtg(file, "RTG_INTERVAL : INVALID:%lums\n", + grp->util_invalid_interval / NSEC_PER_MSEC); seq_printf_rtg(file, "RTG_CLUSTER : %d\n", grp->preferred_cluster ? grp->preferred_cluster->id : -1); } diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h index a158ab74f292..abd70d449ddb 100644 --- a/kernel/sched/rtg/rtg.h +++ b/kernel/sched/rtg/rtg.h @@ -16,7 +16,7 @@ void init_task_rtg(struct task_struct *p); int alloc_related_thread_groups(void); struct related_thread_group *lookup_related_thread_group(unsigned int group_id); struct related_thread_group *task_related_thread_group(struct task_struct *p); -void update_group_nr_running(struct task_struct *p, int event); +void update_group_nr_running(struct task_struct *p, int event, u64 wallclock); struct rq; void update_group_demand(struct task_struct *p, struct rq *rq, int event, u64 wallclock); @@ -29,6 +29,10 @@ int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p); int sched_set_group_preferred_cluster(unsigned int grp_id, int sched_cluster_id); struct cpumask *find_rtg_target(struct task_struct *p); int find_rtg_cpu(struct task_struct *p); +int sched_set_group_util_invalid_interval(unsigned int grp_id, + unsigned int interval); +int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util, + unsigned int flag); #else static inline int alloc_related_thread_groups(void) { return 0; } static inline int sched_set_group_preferred_cluster(unsigned int grp_id, @@ -36,5 +40,10 @@ static inline int sched_set_group_preferred_cluster(unsigned int grp_id, { return 0; } +static inline int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util, + unsigned int flag) +{ + return 0; +} #endif /* CONFIG_SCHED_RTG */ #endif diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index a2824cc9bc2e..40515b1bbdb7 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -1180,7 +1180,7 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int event, old_window_start = update_window_start(rq, wallclock, event); #ifdef CONFIG_SCHED_RTG - update_group_nr_running(p, event); + update_group_nr_running(p, event, wallclock); #endif if (!p->ravg.mark_start) goto done; -- Gitee From 9559e7e57b517576f3ed5890a6ceeebbdc9b012d Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 14 Feb 2022 14:55:36 +0800 Subject: [PATCH 08/11] sched: Add debugfs for sched cluster codeaurora inclusion category: feature issue: #I4SULH CVE: NA Signed-off-by: Li Ming ------------------------------------------- Show the information of sched cluster in /proc/sched_cluster. Signed-off-by: Vikram Mulukutla Signed-off-by: Satya Durga Srinivasu Prabhala Signed-off-by: Srinath Sridharan --- kernel/sched/walt.c | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 40515b1bbdb7..8d4c79028d8a 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -1813,3 +1813,49 @@ void walt_sched_init_rq(struct rq *rq) for (j = 0; j < NUM_TRACKED_WINDOWS; j++) memset(&rq->load_subs[j], 0, sizeof(struct load_subtractions)); } + +#define min_cap_cluster() \ + list_first_entry(&cluster_head, struct sched_cluster, list) +#define max_cap_cluster() \ + list_last_entry(&cluster_head, struct sched_cluster, list) +static int sched_cluster_debug_show(struct seq_file *file, void *param) +{ + struct sched_cluster *cluster = NULL; + + seq_printf(file, "min_id:%d, max_id:%d\n", + min_cap_cluster()->id, + max_cap_cluster()->id); + + for_each_sched_cluster(cluster) { + seq_printf(file, "id:%d, cpumask:%d(%*pbl)\n", + cluster->id, + cpumask_first(&cluster->cpus), + cpumask_pr_args(&cluster->cpus)); + } + + return 0; +} + +static int sched_cluster_debug_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_cluster_debug_show, NULL); +} + +static const struct proc_ops sched_cluster_fops = { + .proc_open = sched_cluster_debug_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = seq_release, +}; + +static int __init init_sched_cluster_debug_procfs(void) +{ + struct proc_dir_entry *pe = NULL; + + pe = proc_create("sched_cluster", + 0444, NULL, &sched_cluster_fops); + if (!pe) + return -ENOMEM; + return 0; +} +late_initcall(init_sched_cluster_debug_procfs); -- Gitee From b7f2b5a8b7a4f1d479b0960a49480912e2783ef8 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 14 Feb 2022 18:01:31 +0800 Subject: [PATCH 09/11] sched: Support forced adjustment of CPU frequency according to the group util codeaurora inclusion category: feature issue: #I4SULH CVE: NA Signed-off-by: Li Ming ------------------------------------------- Add SCHED_CPUFREQ_FORCE_UPDATE flag to support skip CPU frequency scaling interval (rate_limit_us is 20ms by default) check. Signed-off-by: Vikram Mulukutla Signed-off-by: Satya Durga Srinivasu Prabhala Signed-off-by: Srinath Sridharan --- include/linux/sched/cpufreq.h | 1 + include/linux/sched/rtg.h | 7 +++ kernel/sched/cpufreq_schedutil.c | 26 +++++++++-- kernel/sched/rtg/rtg.c | 79 ++++++++++++++++++++++++++++++-- kernel/sched/rtg/rtg.h | 8 ++++ 5 files changed, 114 insertions(+), 7 deletions(-) diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h index c7cf63236f5b..94e7f84de227 100644 --- a/include/linux/sched/cpufreq.h +++ b/include/linux/sched/cpufreq.h @@ -11,6 +11,7 @@ #define SCHED_CPUFREQ_IOWAIT (1U << 0) #define SCHED_CPUFREQ_WALT (1U << 1) #define SCHED_CPUFREQ_CONTINUE (1U << 2) +#define SCHED_CPUFREQ_FORCE_UPDATE (1U << 3) #ifdef CONFIG_CPU_FREQ struct cpufreq_policy; diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h index d27e1507e334..735b8ccae745 100644 --- a/include/linux/sched/rtg.h +++ b/include/linux/sched/rtg.h @@ -44,13 +44,20 @@ struct related_thread_group { int max_boost; unsigned long util_invalid_interval; /* in nanoseconds */ unsigned long util_update_timeout; /* in nanoseconds */ + unsigned long freq_update_interval; /* in nanoseconds */ u64 last_util_update_time; + u64 last_freq_update_time; }; struct rtg_class { void (*sched_update_rtg_tick)(struct related_thread_group *grp); }; +enum rtg_freq_update_flags { + RTG_FREQ_FORCE_UPDATE = (1 << 0), + RTG_FREQ_NORMAL_UPDATE = (1 << 1), +}; + int sched_set_group_id(struct task_struct *p, unsigned int group_id); unsigned int sched_get_group_id(struct task_struct *p); #endif /* CONFIG_SCHED_RTG */ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index cb72dc5c2002..742ed2fe50de 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "sched.h" +#include "rtg/rtg.h" #include #include @@ -38,6 +39,10 @@ struct sugov_policy { struct mutex work_lock; struct kthread_worker worker; struct task_struct *thread; +#ifdef CONFIG_SCHED_RTG + unsigned long rtg_util; + unsigned int rtg_freq; +#endif bool work_in_progress; bool limits_changed; @@ -448,13 +453,18 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, unsigned long util, max; unsigned int next_f; unsigned int cached_freq = sg_policy->cached_raw_freq; + bool force_update = false; + +#ifdef CONFIG_SCHED_RTG + force_update = flags & SCHED_CPUFREQ_FORCE_UPDATE; +#endif sugov_iowait_boost(sg_cpu, time, flags); sg_cpu->last_update = time; ignore_dl_rate_limit(sg_cpu, sg_policy); - if (!sugov_should_update_freq(sg_policy, time)) + if (!force_update && !sugov_should_update_freq(sg_policy, time)) return; util = sugov_get_util(sg_cpu); @@ -507,6 +517,11 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) } } +#ifdef CONFIG_SCHED_RTG + sched_get_max_group_util(policy->cpus, &sg_policy->rtg_util, &sg_policy->rtg_freq); + util = max(sg_policy->rtg_util, util); +#endif + return get_next_freq(sg_policy, util, max); } @@ -516,7 +531,11 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned int next_f; + bool force_update = false; +#ifdef CONFIG_SCHED_RTG + force_update = flags & SCHED_CPUFREQ_FORCE_UPDATE; +#endif raw_spin_lock(&sg_policy->update_lock); sugov_iowait_boost(sg_cpu, time, flags); @@ -525,9 +544,10 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) ignore_dl_rate_limit(sg_cpu, sg_policy); #ifdef CONFIG_SCHED_WALT - if (sugov_should_update_freq(sg_policy, time) && !(flags & SCHED_CPUFREQ_CONTINUE)) { + if ((force_update || sugov_should_update_freq(sg_policy, time)) + && !(flags & SCHED_CPUFREQ_CONTINUE)) { #else - if (sugov_should_update_freq(sg_policy, time)) { + if (force_update || sugov_should_update_freq(sg_policy, time)) { #endif next_f = sugov_next_freq_shared(sg_cpu, time); diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c index 016b2143ea8c..51b9c3fad7da 100644 --- a/kernel/sched/rtg/rtg.c +++ b/kernel/sched/rtg/rtg.c @@ -17,6 +17,7 @@ #define DEFAULT_GROUP_RATE 60 /* 60FPS */ #define DEFAULT_UTIL_INVALID_INTERVAL (~0U) /* ns */ #define DEFAULT_UTIL_UPDATE_TIMEOUT 20000000 /* ns */ +#define DEFAULT_FREQ_UPDATE_INTERVAL 8000000 /* ns */ struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID]; static DEFINE_RWLOCK(related_thread_group_lock); @@ -62,6 +63,7 @@ int alloc_related_thread_groups(void) grp->util_invalid_interval = DEFAULT_UTIL_INVALID_INTERVAL; grp->util_update_timeout = DEFAULT_UTIL_UPDATE_TIMEOUT; grp->max_boost = 0; + grp->freq_update_interval = DEFAULT_FREQ_UPDATE_INTERVAL; raw_spin_lock_init(&grp->lock); related_thread_groups[i] = grp; @@ -780,7 +782,7 @@ group_should_invalid_util(struct related_thread_group *grp, u64 now) if (grp->util_invalid_interval == DEFAULT_UTIL_INVALID_INTERVAL) return false; - return true; + return (now - grp->last_freq_update_time >= grp->util_invalid_interval); } static inline bool valid_normalized_util(struct related_thread_group *grp) @@ -870,13 +872,34 @@ static struct sched_cluster *best_cluster(struct related_thread_group *grp) return max_cluster; } +static bool group_should_update_freq(struct related_thread_group *grp, + int cpu, unsigned int flags, u64 now) +{ + if (!grp) + return true; + + if (flags & RTG_FREQ_FORCE_UPDATE) { + return true; + } else if (flags & RTG_FREQ_NORMAL_UPDATE) { + if (now - grp->last_freq_update_time >= + grp->freq_update_interval) + return true; + } + + return false; +} + int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util, unsigned int flag) { struct related_thread_group *grp = NULL; + bool need_update_prev_freq = false; + bool need_update_next_freq = false; u64 now; unsigned long flags; struct sched_cluster *preferred_cluster = NULL; + int prev_cpu; + int next_cpu; grp = lookup_related_thread_group(grp_id); if (!grp) { @@ -896,16 +919,63 @@ int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util, preferred_cluster = best_cluster(grp); /* update prev_cluster force when preferred_cluster changed */ - if (!grp->preferred_cluster) + if (!grp->preferred_cluster) { grp->preferred_cluster = preferred_cluster; - else if (grp->preferred_cluster != preferred_cluster) + } else if (grp->preferred_cluster != preferred_cluster) { + prev_cpu = cpumask_first(&grp->preferred_cluster->cpus); grp->preferred_cluster = preferred_cluster; + need_update_prev_freq = true; + } + + if (grp->preferred_cluster != NULL) + next_cpu = cpumask_first(&grp->preferred_cluster->cpus); + else + next_cpu = 0; + now = ktime_get_ns(); grp->last_util_update_time = now; + need_update_next_freq = + group_should_update_freq(grp, next_cpu, flag, now); + if (need_update_next_freq) + grp->last_freq_update_time = now; raw_spin_unlock_irqrestore(&grp->lock, flags); + if (need_update_prev_freq) + cpufreq_update_util(cpu_rq(prev_cpu), + SCHED_CPUFREQ_FORCE_UPDATE | SCHED_CPUFREQ_WALT); + + if (need_update_next_freq) + cpufreq_update_util(cpu_rq(next_cpu), + SCHED_CPUFREQ_FORCE_UPDATE | SCHED_CPUFREQ_WALT); + + return 0; +} + +int sched_set_group_freq_update_interval(unsigned int grp_id, unsigned int interval) +{ + struct related_thread_group *grp = NULL; + unsigned long flag; + + if ((signed int)interval <= 0) + return -EINVAL; + + /* DEFAULT_CGROUP_COLOC_ID is a reserved id */ + if (grp_id == DEFAULT_CGROUP_COLOC_ID || + grp_id >= MAX_NUM_CGROUP_COLOC_ID) + return -EINVAL; + + grp = lookup_related_thread_group(grp_id); + if (!grp) { + pr_err("set update interval for group %d fail\n", grp_id); + return -ENODEV; + } + + raw_spin_lock_irqsave(&grp->lock, flag); + grp->freq_update_interval = interval * NSEC_PER_MSEC; + raw_spin_unlock_irqrestore(&grp->lock, flag); + return 0; } @@ -922,7 +992,8 @@ static void print_rtg_info(struct seq_file *file, const struct related_thread_group *grp) { seq_printf_rtg(file, "RTG_ID : %d\n", grp->id); - seq_printf_rtg(file, "RTG_INTERVAL : INVALID:%lums\n", + seq_printf_rtg(file, "RTG_INTERVAL : UPDATE:%lums#INVALID:%lums\n", + grp->freq_update_interval / NSEC_PER_MSEC, grp->util_invalid_interval / NSEC_PER_MSEC); seq_printf_rtg(file, "RTG_CLUSTER : %d\n", grp->preferred_cluster ? grp->preferred_cluster->id : -1); diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h index abd70d449ddb..23536c62859a 100644 --- a/kernel/sched/rtg/rtg.h +++ b/kernel/sched/rtg/rtg.h @@ -33,6 +33,10 @@ int sched_set_group_util_invalid_interval(unsigned int grp_id, unsigned int interval); int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util, unsigned int flag); +void sched_get_max_group_util(const struct cpumask *query_cpus, + unsigned long *util, unsigned int *freq); +int sched_set_group_freq_update_interval(unsigned int grp_id, + unsigned int interval); #else static inline int alloc_related_thread_groups(void) { return 0; } static inline int sched_set_group_preferred_cluster(unsigned int grp_id, @@ -45,5 +49,9 @@ static inline int sched_set_group_normalized_util(unsigned int grp_id, unsigned { return 0; } +static inline void sched_get_max_group_util(const struct cpumask *query_cpus, + unsigned long *util, unsigned int *freq) +{ +} #endif /* CONFIG_SCHED_RTG */ #endif -- Gitee From e2a541d6c6b538ab88da0a273e3faba31662a9cd Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 14 Feb 2022 18:38:18 +0800 Subject: [PATCH 10/11] sched: Support adding new tasks to the default group via cgroup attach codeaurora inclusion category: feature issue: #I4SULH CVE: NA Signed-off-by: Li Ming ------------------------------------------- If uclamp.colocate of cpu cgroup is set, the new task which belongs to cgroup will be added to the default group (group_id = DEFAULT_CGROUP_COLOC_ID). Signed-off-by: Vikram Mulukutla Signed-off-by: Satya Durga Srinivasu Prabhala Signed-off-by: Srinath Sridharan --- kernel/sched/core.c | 60 +++++++++++++++++++ kernel/sched/rtg/Kconfig | 8 +++ kernel/sched/rtg/rtg.c | 125 +++++++++++++++++++++++++++++++++++++-- kernel/sched/rtg/rtg.h | 7 +++ kernel/sched/sched.h | 11 ++++ 5 files changed, 207 insertions(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 20dd5009e315..8e506f6efc73 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3452,6 +3452,8 @@ void wake_up_new_task(struct task_struct *p) struct rq *rq; raw_spin_lock_irqsave(&p->pi_lock, rf.flags); + add_new_task_to_grp(p); + p->state = TASK_RUNNING; #ifdef CONFIG_SMP /* @@ -8060,6 +8062,11 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (IS_ERR(tg)) return ERR_PTR(-ENOMEM); +#ifdef CONFIG_SCHED_RTG_CGROUP + tg->colocate = false; + tg->colocate_update_disabled = false; +#endif + return &tg->css; } @@ -8149,6 +8156,25 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) return ret; } +#if defined(CONFIG_UCLAMP_TASK_GROUP) && defined(CONFIG_SCHED_RTG_CGROUP) +static void schedgp_attach(struct cgroup_taskset *tset) +{ + struct task_struct *task; + struct cgroup_subsys_state *css; + bool colocate; + struct task_group *tg; + + cgroup_taskset_first(tset, &css); + tg = css_tg(css); + + colocate = tg->colocate; + + cgroup_taskset_for_each(task, css, tset) + sync_cgroup_colocation(task, colocate); +} +#else +static void schedgp_attach(struct cgroup_taskset *tset) { } +#endif static void cpu_cgroup_attach(struct cgroup_taskset *tset) { struct task_struct *task; @@ -8156,6 +8182,8 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset) cgroup_taskset_for_each(task, css, tset) sched_move_task(task); + + schedgp_attach(tset); } #ifdef CONFIG_UCLAMP_TASK_GROUP @@ -8333,6 +8361,30 @@ static int cpu_uclamp_max_show(struct seq_file *sf, void *v) cpu_uclamp_print(sf, UCLAMP_MAX); return 0; } + +#ifdef CONFIG_SCHED_RTG_CGROUP +static u64 sched_colocate_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct task_group *tg = css_tg(css); + + return (u64) tg->colocate; +} + +static int sched_colocate_write(struct cgroup_subsys_state *css, + struct cftype *cft, u64 colocate) +{ + struct task_group *tg = css_tg(css); + + if (tg->colocate_update_disabled) + return -EPERM; + + tg->colocate = !!colocate; + tg->colocate_update_disabled = true; + + return 0; +} +#endif /* CONFIG_SCHED_RTG_CGROUP */ #endif /* CONFIG_UCLAMP_TASK_GROUP */ #ifdef CONFIG_FAIR_GROUP_SCHED @@ -8701,6 +8753,14 @@ static struct cftype cpu_legacy_files[] = { .seq_show = cpu_uclamp_max_show, .write = cpu_uclamp_max_write, }, +#ifdef CONFIG_SCHED_RTG_CGROUP + { + .name = "uclamp.colocate", + .flags = CFTYPE_NOT_ON_ROOT, + .read_u64 = sched_colocate_read, + .write_u64 = sched_colocate_write, + }, +#endif #endif { } /* Terminate */ }; diff --git a/kernel/sched/rtg/Kconfig b/kernel/sched/rtg/Kconfig index a96073631d16..3e5acad17ac5 100644 --- a/kernel/sched/rtg/Kconfig +++ b/kernel/sched/rtg/Kconfig @@ -14,4 +14,12 @@ config SCHED_RTG_DEBUG help If set, debug node will show rtg threads +config SCHED_RTG_CGROUP + bool "enable DEFAULT_CGROUP_COLOC RTG" + depends on SCHED_RTG + default n + help + If set, support for adding the tasks which belong to + co-located cgroup to DEFAULT_CGROUP_COLOC RTG. + endmenu diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c index 51b9c3fad7da..e2cf2cdab65c 100644 --- a/kernel/sched/rtg/rtg.c +++ b/kernel/sched/rtg/rtg.c @@ -301,10 +301,18 @@ static int __sched_set_group_id(struct task_struct *p, unsigned int group_id) * In other cases, Switching from one group to another directly is not permitted. */ if (old_grp && group_id) { - pr_err("%s[%d] switching group from %d to %d failed.\n", - p->comm, p->pid, old_grp->id, group_id); - rc = -EINVAL; - goto done; +#ifdef CONFIG_SCHED_RTG_CGROUP + if (old_grp->id == DEFAULT_CGROUP_COLOC_ID) { + remove_task_from_group(p); + } else { +#endif + pr_err("%s[%d] switching group from %d to %d failed.\n", + p->comm, p->pid, old_grp->id, group_id); + rc = -EINVAL; + goto done; +#ifdef CONFIG_SCHED_RTG_CGROUP + } +#endif } if (!group_id) { @@ -979,6 +987,115 @@ int sched_set_group_freq_update_interval(unsigned int grp_id, unsigned int inter return 0; } +#ifdef CONFIG_SCHED_RTG_CGROUP +#ifdef CONFIG_UCLAMP_TASK_GROUP +static inline bool uclamp_task_colocated(struct task_struct *p) +{ + struct cgroup_subsys_state *css; + struct task_group *tg; + bool colocate; + + rcu_read_lock(); + css = task_css(p, cpu_cgrp_id); + if (!css) { + rcu_read_unlock(); + return false; + } + tg = container_of(css, struct task_group, css); + colocate = tg->colocate; + rcu_read_unlock(); + + return colocate; +} +#else +static inline bool uclamp_task_colocated(struct task_struct *p) +{ + return false; +} +#endif /* CONFIG_UCLAMP_TASK_GROUP */ + +void add_new_task_to_grp(struct task_struct *new) +{ + struct related_thread_group *grp = NULL; + unsigned long flag; + + /* + * If the task does not belong to colocated schedtune + * cgroup, nothing to do. We are checking this without + * lock. Even if there is a race, it will be added + * to the co-located cgroup via cgroup attach. + */ + if (!uclamp_task_colocated(new)) + return; + + grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID); + write_lock_irqsave(&related_thread_group_lock, flag); + + /* + * It's possible that someone already added the new task to the + * group. or it might have taken out from the colocated schedtune + * cgroup. check these conditions under lock. + */ + if (!uclamp_task_colocated(new) || new->grp) { + write_unlock_irqrestore(&related_thread_group_lock, flag); + return; + } + + raw_spin_lock(&grp->lock); + + rcu_assign_pointer(new->grp, grp); + list_add(&new->grp_list, &grp->tasks); + + raw_spin_unlock(&grp->lock); + write_unlock_irqrestore(&related_thread_group_lock, flag); +} + + +/* + * We create a default colocation group at boot. There is no need to + * synchronize tasks between cgroups at creation time because the + * correct cgroup hierarchy is not available at boot. Therefore cgroup + * colocation is turned off by default even though the colocation group + * itself has been allocated. Furthermore this colocation group cannot + * be destroyted once it has been created. All of this has been as part + * of runtime optimizations. + * + * The job of synchronizing tasks to the colocation group is done when + * the colocation flag in the cgroup is turned on. + */ +static int __init create_default_coloc_group(void) +{ + struct related_thread_group *grp = NULL; + unsigned long flags; + + grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID); + write_lock_irqsave(&related_thread_group_lock, flags); + list_add(&grp->list, &active_related_thread_groups); + write_unlock_irqrestore(&related_thread_group_lock, flags); + + return 0; +} +late_initcall(create_default_coloc_group); + +int sync_cgroup_colocation(struct task_struct *p, bool insert) +{ + unsigned int grp_id = insert ? DEFAULT_CGROUP_COLOC_ID : 0; + unsigned int old_grp_id; + + if (p) { + old_grp_id = sched_get_group_id(p); + /* + * If the task is already in a group which is not DEFAULT_CGROUP_COLOC_ID, + * we should not change the group id during switch to background. + */ + if ((old_grp_id != DEFAULT_CGROUP_COLOC_ID) && (grp_id == 0)) + return 0; + } + + return __sched_set_group_id(p, grp_id); +} +#endif /* CONFIG_SCHED_RTG_CGROUP */ + #ifdef CONFIG_SCHED_RTG_DEBUG #define seq_printf_rtg(m, x...) \ do { \ diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h index 23536c62859a..4f0cedc332f0 100644 --- a/kernel/sched/rtg/rtg.h +++ b/kernel/sched/rtg/rtg.h @@ -37,6 +37,12 @@ void sched_get_max_group_util(const struct cpumask *query_cpus, unsigned long *util, unsigned int *freq); int sched_set_group_freq_update_interval(unsigned int grp_id, unsigned int interval); +#ifdef CONFIG_SCHED_RTG_CGROUP +int sync_cgroup_colocation(struct task_struct *p, bool insert); +void add_new_task_to_grp(struct task_struct *new); +#else +static inline void add_new_task_to_grp(struct task_struct *new) {} +#endif /* CONFIG_SCHED_RTG_CGROUP */ #else static inline int alloc_related_thread_groups(void) { return 0; } static inline int sched_set_group_preferred_cluster(unsigned int grp_id, @@ -53,5 +59,6 @@ static inline void sched_get_max_group_util(const struct cpumask *query_cpus, unsigned long *util, unsigned int *freq) { } +static inline void add_new_task_to_grp(struct task_struct *new) {} #endif /* CONFIG_SCHED_RTG */ #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9630e3c00558..d79744dcc048 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -477,6 +477,17 @@ struct task_group { /* Effective clamp values used for a task group */ struct uclamp_se uclamp[UCLAMP_CNT]; #endif + +#ifdef CONFIG_SCHED_RTG_CGROUP + /* + * Controls whether tasks of this cgroup should be colocated with each + * other and tasks of other cgroups that have the same flag turned on. + */ + bool colocate; + + /* Controls whether further updates are allowed to the colocate flag */ + bool colocate_update_disabled; +#endif }; #ifdef CONFIG_FAIR_GROUP_SCHED -- Gitee From 1f0d48afb6ced6e61855f6a97a8dafb022676294 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Mon, 14 Feb 2022 18:46:40 +0800 Subject: [PATCH 11/11] sched: Add trace points for related thread group scheduling codeaurora inclusion category: feature issue: #I4SULH CVE: NA Signed-off-by: Li Ming ------------------------------------------- Add find_rtg_cpu/sched_rtg_task_each/sched_rtg_valid_normalized_util trace points for cpu selection. Signed-off-by: Vikram Mulukutla Signed-off-by: Satya Durga Srinivasu Prabhala Signed-off-by: Srinath Sridharan --- include/trace/events/rtg.h | 117 +++++++++++++++++++++++++++++++++++++ kernel/sched/rtg/rtg.c | 15 ++++- 2 files changed, 130 insertions(+), 2 deletions(-) create mode 100644 include/trace/events/rtg.h diff --git a/include/trace/events/rtg.h b/include/trace/events/rtg.h new file mode 100644 index 000000000000..12422d2c3ee2 --- /dev/null +++ b/include/trace/events/rtg.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rtg + +#if !defined(_TRACE_RTG_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RTG_H + +#include +#include + +struct rq; + +TRACE_EVENT(find_rtg_cpu, + + TP_PROTO(struct task_struct *p, const struct cpumask *perferred_cpumask, + char *msg, int cpu), + + TP_ARGS(p, perferred_cpumask, msg, cpu), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __bitmask(cpus, num_possible_cpus()) + __array(char, msg, TASK_COMM_LEN) + __field(int, cpu) + ), + + TP_fast_assign( + __entry->pid = p->pid; + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __assign_bitmask(cpus, cpumask_bits(perferred_cpumask), num_possible_cpus()); + memcpy(__entry->msg, msg, min((size_t)TASK_COMM_LEN, strlen(msg)+1)); + __entry->cpu = cpu; + ), + + TP_printk("comm=%s pid=%d perferred_cpus=%s reason=%s target_cpu=%d", + __entry->comm, __entry->pid, __get_bitmask(cpus), __entry->msg, __entry->cpu) +); + +TRACE_EVENT(sched_rtg_task_each, + + TP_PROTO(unsigned int id, unsigned int nr_running, struct task_struct *task), + + TP_ARGS(id, nr_running, task), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, nr_running) + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, prio) + __bitmask(allowed, num_possible_cpus()) + __field(int, cpu) + __field(int, state) + __field(bool, on_rq) + __field(int, on_cpu) + ), + + TP_fast_assign( + __entry->id = id; + __entry->nr_running = nr_running; + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + __entry->prio = task->prio; + __assign_bitmask(allowed, cpumask_bits(&task->cpus_mask), num_possible_cpus()); + __entry->cpu = task_cpu(task); + __entry->state = task->state; + __entry->on_rq = task->on_rq; + __entry->on_cpu = task->on_cpu; + ), + + TP_printk("comm=%s pid=%d prio=%d allowed=%s cpu=%d state=%s%s on_rq=%d on_cpu=%d", + __entry->comm, __entry->pid, __entry->prio, __get_bitmask(allowed), __entry->cpu, + __entry->state & (TASK_REPORT_MAX) ? + __print_flags(__entry->state & (TASK_REPORT_MAX), "|", + { TASK_INTERRUPTIBLE, "S" }, + { TASK_UNINTERRUPTIBLE, "D" }, + { __TASK_STOPPED, "T" }, + { __TASK_TRACED, "t" }, + { EXIT_DEAD, "X" }, + { EXIT_ZOMBIE, "Z" }, + { TASK_DEAD, "x" }, + { TASK_WAKEKILL, "K"}, + { TASK_WAKING, "W"}) : "R", + __entry->state & TASK_STATE_MAX ? "+" : "", + __entry->on_rq, __entry->on_cpu) +); + +TRACE_EVENT(sched_rtg_valid_normalized_util, + + TP_PROTO(unsigned int id, unsigned int nr_running, + const struct cpumask *rtg_cpus, unsigned int valid), + + TP_ARGS(id, nr_running, rtg_cpus, valid), + + TP_STRUCT__entry( + __field(unsigned int, id) + __field(unsigned int, nr_running) + __bitmask(cpus, num_possible_cpus()) + __field(unsigned int, valid) + ), + + TP_fast_assign( + __entry->id = id; + __entry->nr_running = nr_running; + __assign_bitmask(cpus, cpumask_bits(rtg_cpus), num_possible_cpus()); + __entry->valid = valid; + ), + + TP_printk("id=%d nr_running=%d cpus=%s valid=%d", + __entry->id, __entry->nr_running, + __get_bitmask(cpus), __entry->valid) +); +#endif /* _TRACE_RTG_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c index e2cf2cdab65c..dabadd54e59c 100644 --- a/kernel/sched/rtg/rtg.c +++ b/kernel/sched/rtg/rtg.c @@ -6,6 +6,9 @@ #include #include #include +#define CREATE_TRACE_POINTS +#include +#undef CREATE_TRACE_POINTS #include "../sched.h" #include "rtg.h" @@ -717,8 +720,10 @@ int find_rtg_cpu(struct task_struct *p) if (is_reserved(i)) continue; - if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING)) + if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING)) { + trace_find_rtg_cpu(p, preferred_cpus, "prefer_idle", i); return i; + } } for_each_cpu(i, &search_cpus) { @@ -747,8 +752,12 @@ int find_rtg_cpu(struct task_struct *p) } } - if (idle_backup_cpu != -1) + if (idle_backup_cpu != -1) { + trace_find_rtg_cpu(p, preferred_cpus, "idle_backup", idle_backup_cpu); return idle_backup_cpu; + } + + trace_find_rtg_cpu(p, preferred_cpus, "max_spare", max_spare_cap_cpu); return max_spare_cap_cpu; } @@ -804,12 +813,14 @@ static inline bool valid_normalized_util(struct related_thread_group *grp) get_task_struct(p); if (p->state == TASK_RUNNING) cpumask_set_cpu(task_cpu(p), &rtg_cpus); + trace_sched_rtg_task_each(grp->id, grp->nr_running, p); put_task_struct(p); } valid = cpumask_intersects(&rtg_cpus, &grp->preferred_cluster->cpus); } + trace_sched_rtg_valid_normalized_util(grp->id, grp->nr_running, &rtg_cpus, valid); return valid; } -- Gitee