From 18f30ae68a853a0c093cd391bf7ad53d49c069b9 Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Sun, 13 Feb 2022 22:32:45 +0800
Subject: [PATCH 01/11] sched: Introduce related thread group scheduling

codeaurora inclusion
category: feature
issue: #I4SULH
CVE: NA

Signed-off-by: Li Ming <limingming.li@huawei.com>

-------------------------------------------

The original patch is from Code Aurora's latest msm-4.14.
Based on the original patch, we add definitions for
related thread group, and a subsequent changeset will provide
improved schedule for related thread group.

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
---
 include/linux/sched.h     |  7 +++++++
 include/linux/sched/rtg.h | 15 +++++++++++++++
 init/Kconfig              |  2 ++
 kernel/sched/Makefile     |  1 +
 kernel/sched/core.c       | 11 +++++++++++
 kernel/sched/rtg/Kconfig  | 10 ++++++++++
 kernel/sched/rtg/Makefile |  2 ++
 kernel/sched/rtg/rtg.c    | 13 +++++++++++++
 kernel/sched/rtg/rtg.h    | 14 ++++++++++++++
 kernel/sched/walt.c       |  5 +++++
 10 files changed, 80 insertions(+)
 create mode 100644 include/linux/sched/rtg.h
 create mode 100644 kernel/sched/rtg/Kconfig
 create mode 100644 kernel/sched/rtg/Makefile
 create mode 100644 kernel/sched/rtg/rtg.c
 create mode 100644 kernel/sched/rtg/rtg.h

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 44d5d8ed532a..c5e0c99cb3cd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -34,6 +34,7 @@
 #include <linux/rseq.h>
 #include <linux/seqlock.h>
 #include <linux/kcsan.h>
+#include <linux/sched/rtg.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
 struct audit_context;
@@ -801,6 +802,12 @@ struct task_struct {
 	u64 last_sleep_ts;
 #endif
 
+#ifdef CONFIG_SCHED_RTG
+	int rtg_depth;
+	struct related_thread_group	*grp;
+	struct list_head		grp_list;
+#endif
+
 #ifdef CONFIG_CGROUP_SCHED
 	struct task_group		*sched_task_group;
 #endif
diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h
new file mode 100644
index 000000000000..c17636439964
--- /dev/null
+++ b/include/linux/sched/rtg.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SCHED_RTG_H
+#define __SCHED_RTG_H
+
+#ifdef CONFIG_SCHED_RTG
+struct related_thread_group {
+	int id;
+	raw_spinlock_t lock;
+	struct list_head tasks;
+	struct list_head list;
+
+	unsigned int nr_running;
+};
+#endif /* CONFIG_SCHED_RTG */
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index 1512479e7782..1d248e9c5a89 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -858,6 +858,8 @@ config UCLAMP_BUCKETS_COUNT
 
 	  If in doubt, use the default value.
 
+source "kernel/sched/rtg/Kconfig"
+
 endmenu
 
 #
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 0e3173ee99fb..1b4834073ae7 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -28,6 +28,7 @@ obj-y += wait.o wait_bit.o swait.o completion.o
 
 obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
 obj-$(CONFIG_SCHED_WALT) += walt.o
+obj-$(CONFIG_SCHED_RTG) += rtg/
 obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
 obj-$(CONFIG_SCHEDSTATS) += stats.o
 obj-$(CONFIG_SCHED_DEBUG) += debug.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 33e19cbd4eee..3a86b124f41c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -29,6 +29,7 @@
 #include "pelt.h"
 #include "smp.h"
 #include "walt.h"
+#include "rtg/rtg.h"
 
 /*
  * Export tracepoints that act as a bare tracehook (ie: have no trace event
@@ -3207,6 +3208,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 #ifdef CONFIG_SMP
 	p->wake_entry.u_flags = CSD_TYPE_TTWU;
 #endif
+#ifdef CONFIG_SCHED_RTG
+	p->rtg_depth = 0;
+#endif
 }
 
 DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -3350,7 +3354,14 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	if (unlikely(p->sched_reset_on_fork)) {
 		if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
 			p->policy = SCHED_NORMAL;
+#ifdef CONFIG_SCHED_RTG
+			if (current->rtg_depth != 0)
+				p->static_prio = current->static_prio;
+			else
+				p->static_prio = NICE_TO_PRIO(0);
+#else
 			p->static_prio = NICE_TO_PRIO(0);
+#endif
 			p->rt_priority = 0;
 		} else if (PRIO_TO_NICE(p->static_prio) < 0)
 			p->static_prio = NICE_TO_PRIO(0);
diff --git a/kernel/sched/rtg/Kconfig b/kernel/sched/rtg/Kconfig
new file mode 100644
index 000000000000..11a0343d935b
--- /dev/null
+++ b/kernel/sched/rtg/Kconfig
@@ -0,0 +1,10 @@
+menu "Related Thread Group"
+
+config SCHED_RTG
+	bool "Related Thread Group"
+	depends on SCHED_WALT
+	default n
+	help
+	  Set related threads into a group.
+
+endmenu
diff --git a/kernel/sched/rtg/Makefile b/kernel/sched/rtg/Makefile
new file mode 100644
index 000000000000..a911575b0734
--- /dev/null
+++ b/kernel/sched/rtg/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_SCHED_RTG) += rtg.o
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
new file mode 100644
index 000000000000..f48905afbbf4
--- /dev/null
+++ b/kernel/sched/rtg/rtg.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * related thread group sched
+ *
+ */
+#include <linux/sched.h>
+#include "rtg.h"
+
+void init_task_rtg(struct task_struct *p)
+{
+	rcu_assign_pointer(p->grp, NULL);
+	INIT_LIST_HEAD(&p->grp_list);
+}
diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h
new file mode 100644
index 000000000000..39046758a6b7
--- /dev/null
+++ b/kernel/sched/rtg/rtg.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * related thread group sched header
+ */
+#ifndef __RTG_H
+#define __RTG_H
+
+#include <linux/types.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_SCHED_RTG
+void init_task_rtg(struct task_struct *p);
+#endif /* CONFIG_SCHED_RTG */
+#endif
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 30db3d617914..38699a333540 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -24,6 +24,7 @@
 #include "sched.h"
 #include "walt.h"
 #include "core_ctl.h"
+#include "rtg/rtg.h"
 #define CREATE_TRACE_POINTS
 #include <trace/events/walt.h>
 #undef CREATE_TRACE_POINTS
@@ -1160,6 +1161,10 @@ void init_new_task_load(struct task_struct *p)
 	u32 init_load_windows_scaled = sched_init_task_load_windows_scaled;
 	u32 init_load_pct = current->init_load_pct;
 
+#ifdef CONFIG_SCHED_RTG
+	init_task_rtg(p);
+#endif
+
 	p->last_sleep_ts = 0;
 	p->init_load_pct = 0;
 	memset(&p->ravg, 0, sizeof(struct ravg));
-- 
Gitee


From cf327988be3e038a3630c66f0965b4bb61fc78bd Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Sun, 13 Feb 2022 23:36:45 +0800
Subject: [PATCH 02/11] sched: Minimally initialize the related thread group

codeaurora inclusion
category: feature
issue: #I4SULH
CVE: NA

Signed-off-by: Li Ming <limingming.li@huawei.com>

-------------------------------------------

21 groups (MAX_NUM_CGROUP_COLOC_ID) are created by default, of which
DEFAULT_CGROUP_COLOC_ID is a reserved id.

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
---
 include/linux/sched/rtg.h |   7 ++
 kernel/sched/core.c       |   5 +
 kernel/sched/rtg/rtg.c    | 204 ++++++++++++++++++++++++++++++++++++++
 kernel/sched/rtg/rtg.h    |   6 ++
 kernel/sched/walt.c       |   3 +
 5 files changed, 225 insertions(+)

diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h
index c17636439964..85bd334fa9cc 100644
--- a/include/linux/sched/rtg.h
+++ b/include/linux/sched/rtg.h
@@ -3,6 +3,11 @@
 #define __SCHED_RTG_H
 
 #ifdef CONFIG_SCHED_RTG
+
+#define DEFAULT_RTG_GRP_ID	0
+#define DEFAULT_CGROUP_COLOC_ID	1
+#define MAX_NUM_CGROUP_COLOC_ID	21
+
 struct related_thread_group {
 	int id;
 	raw_spinlock_t lock;
@@ -11,5 +16,7 @@ struct related_thread_group {
 
 	unsigned int nr_running;
 };
+
+int sched_set_group_id(struct task_struct *p, unsigned int group_id);
 #endif /* CONFIG_SCHED_RTG */
 #endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3a86b124f41c..574c155b9e3a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7656,6 +7656,7 @@ void __init sched_init(void)
 		atomic_set(&rq->nr_iowait, 0);
 	}
 
+	BUG_ON(alloc_related_thread_groups());
 	set_load_weight(&init_task, false);
 
 	/*
@@ -8970,6 +8971,10 @@ void sched_exit(struct task_struct *p)
 	struct rq *rq;
 	u64 wallclock;
 
+#ifdef CONFIG_SCHED_RTG
+	sched_set_group_id(p, 0);
+#endif
+
 	rq = task_rq_lock(p, &rf);
 
 	/* rq->curr == p */
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
index f48905afbbf4..6d54c48ba36f 100644
--- a/kernel/sched/rtg/rtg.c
+++ b/kernel/sched/rtg/rtg.c
@@ -4,10 +4,214 @@
  *
  */
 #include <linux/sched.h>
+
+#include "../sched.h"
 #include "rtg.h"
 
+struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID];
+static DEFINE_RWLOCK(related_thread_group_lock);
+static LIST_HEAD(active_related_thread_groups);
+
 void init_task_rtg(struct task_struct *p)
 {
 	rcu_assign_pointer(p->grp, NULL);
 	INIT_LIST_HEAD(&p->grp_list);
 }
+
+struct related_thread_group *task_related_thread_group(struct task_struct *p)
+{
+	return rcu_dereference(p->grp);
+}
+
+struct related_thread_group *
+lookup_related_thread_group(unsigned int group_id)
+{
+	return related_thread_groups[group_id];
+}
+
+int alloc_related_thread_groups(void)
+{
+	int i, ret;
+	struct related_thread_group *grp = NULL;
+
+	/* groupd_id = 0 is invalid as it's special id to remove group. */
+	for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
+		grp = kzalloc(sizeof(*grp), GFP_NOWAIT);
+		if (!grp) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		grp->id = i;
+		INIT_LIST_HEAD(&grp->tasks);
+		INIT_LIST_HEAD(&grp->list);
+		raw_spin_lock_init(&grp->lock);
+
+		related_thread_groups[i] = grp;
+	}
+
+	return 0;
+
+err:
+	for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
+		grp = lookup_related_thread_group(i);
+		if (grp) {
+			kfree(grp);
+			related_thread_groups[i] = NULL;
+		} else {
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static void remove_task_from_group(struct task_struct *p)
+{
+	struct related_thread_group *grp = p->grp;
+	struct rq *rq = NULL;
+	bool empty_group = true;
+	struct rq_flags flag;
+	unsigned long irqflag;
+
+	rq = __task_rq_lock(p, &flag);
+
+	raw_spin_lock_irqsave(&grp->lock, irqflag);
+	list_del_init(&p->grp_list);
+	rcu_assign_pointer(p->grp, NULL);
+
+	if (p->on_cpu)
+		grp->nr_running--;
+
+	if ((int)grp->nr_running < 0) {
+		WARN_ON(1);
+		grp->nr_running = 0;
+	}
+
+	if (!list_empty(&grp->tasks))
+		empty_group = false;
+
+	raw_spin_unlock_irqrestore(&grp->lock, irqflag);
+	__task_rq_unlock(rq, &flag);
+
+	/* Reserved groups cannot be destroyed */
+	if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID) {
+		 /*
+		  * We test whether grp->list is attached with list_empty()
+		  * hence re-init the list after deletion.
+		  */
+		write_lock(&related_thread_group_lock);
+		list_del_init(&grp->list);
+		write_unlock(&related_thread_group_lock);
+	}
+}
+
+static int
+add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
+{
+	struct rq *rq = NULL;
+	struct rq_flags flag;
+	unsigned long irqflag;
+
+	/*
+	 * Change p->grp under rq->lock. Will prevent races with read-side
+	 * reference of p->grp in various hot-paths
+	 */
+	rq = __task_rq_lock(p, &flag);
+
+	raw_spin_lock_irqsave(&grp->lock, irqflag);
+	list_add(&p->grp_list, &grp->tasks);
+	rcu_assign_pointer(p->grp, grp);
+	if (p->on_cpu)
+		grp->nr_running++;
+
+	raw_spin_unlock_irqrestore(&grp->lock, irqflag);
+	__task_rq_unlock(rq, &flag);
+
+	return 0;
+}
+
+static int __sched_set_group_id(struct task_struct *p, unsigned int group_id)
+{
+	int rc = 0;
+	unsigned long flags;
+	struct related_thread_group *grp = NULL;
+	struct related_thread_group *old_grp = NULL;
+
+	if (group_id >= MAX_NUM_CGROUP_COLOC_ID)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	old_grp = p->grp;
+	if ((current != p && (p->flags & PF_EXITING)) ||
+	    (!old_grp && !group_id))
+		goto done;
+
+	/*
+	 * If the system has CONFIG_SCHED_RTG_CGROUP, only tasks in DEFAULT group
+	 * can be directly switched to other groups.
+	 *
+	 * In other cases, Switching from one group to another directly is not permitted.
+	 */
+	if (old_grp && group_id) {
+		pr_err("%s[%d] switching group from %d to %d failed.\n",
+		       p->comm, p->pid, old_grp->id, group_id);
+		rc = -EINVAL;
+		goto done;
+	}
+
+	if (!group_id) {
+		remove_task_from_group(p);
+		goto done;
+	}
+
+	grp = lookup_related_thread_group(group_id);
+	write_lock(&related_thread_group_lock);
+	if (list_empty(&grp->list))
+		list_add(&grp->list, &active_related_thread_groups);
+	write_unlock(&related_thread_group_lock);
+
+	rc = add_task_to_group(p, grp);
+done:
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+	return rc;
+}
+
+/* group_id == 0: remove task from rtg */
+int sched_set_group_id(struct task_struct *p, unsigned int group_id)
+{
+	/* DEFAULT_CGROUP_COLOC_ID is a reserved id */
+	if (group_id == DEFAULT_CGROUP_COLOC_ID)
+		return -EINVAL;
+
+	return __sched_set_group_id(p, group_id);
+}
+
+void update_group_nr_running(struct task_struct *p, int event)
+{
+	struct related_thread_group *grp;
+
+	rcu_read_lock();
+	grp = task_related_thread_group(p);
+	if (!grp) {
+		rcu_read_unlock();
+		return;
+	}
+
+	raw_spin_lock(&grp->lock);
+
+	if (event == PICK_NEXT_TASK)
+		grp->nr_running++;
+	else if (event == PUT_PREV_TASK)
+		grp->nr_running--;
+
+	if ((int)grp->nr_running < 0) {
+		WARN_ON(1);
+		grp->nr_running = 0;
+	}
+
+	raw_spin_unlock(&grp->lock);
+
+	rcu_read_unlock();
+}
diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h
index 39046758a6b7..80661f8b2d32 100644
--- a/kernel/sched/rtg/rtg.h
+++ b/kernel/sched/rtg/rtg.h
@@ -10,5 +10,11 @@
 
 #ifdef CONFIG_SCHED_RTG
 void init_task_rtg(struct task_struct *p);
+int alloc_related_thread_groups(void);
+struct related_thread_group *lookup_related_thread_group(unsigned int group_id);
+struct related_thread_group *task_related_thread_group(struct task_struct *p);
+void update_group_nr_running(struct task_struct *p, int event);
+#else
+static inline int alloc_related_thread_groups(void) { return 0; }
 #endif /* CONFIG_SCHED_RTG */
 #endif
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 38699a333540..f560321b8691 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -1109,6 +1109,9 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
 
 	old_window_start = update_window_start(rq, wallclock, event);
 
+#ifdef CONFIG_SCHED_RTG
+	update_group_nr_running(p, event);
+#endif
 	if (!p->ravg.mark_start)
 		goto done;
 
-- 
Gitee


From f1ca14c3ed9637a0f123ef938624d8029d265d84 Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Mon, 14 Feb 2022 09:46:44 +0800
Subject: [PATCH 03/11] sched: Add debugfs for related thread group

codeaurora inclusion
category: feature
issue: #I4SULH
CVE: NA

Signed-off-by: Li Ming <limingming.li@huawei.com>

-------------------------------------------

/proc/$PID/sched_group_id:
1. write $GROUP_ID to the sched_group_id file to add task
   (pid = $PID) to related thread group (group_id = $GROUP_ID).
2. read the group id which the process is located from
   the sched_group_id file

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
---
 fs/proc/base.c            |  70 ++++++++++++++++++
 include/linux/sched/rtg.h |   1 +
 kernel/sched/rtg/Kconfig  |   7 ++
 kernel/sched/rtg/rtg.c    | 151 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 229 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 96d4ab81619e..0d40f7a2cc4d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1499,6 +1499,70 @@ static const struct file_operations proc_pid_sched_operations = {
 
 #endif
 
+#ifdef CONFIG_SCHED_RTG_DEBUG
+static int sched_group_id_show(struct seq_file *m, void *v)
+{
+	struct inode *inode = m->private;
+	struct task_struct *p;
+
+	p = get_proc_task(inode);
+	if (!p)
+		return -ESRCH;
+
+	seq_printf(m, "%d\n", sched_get_group_id(p));
+
+	put_task_struct(p);
+
+	return 0;
+}
+
+static ssize_t
+sched_group_id_write(struct file *file, const char __user *buf,
+	    size_t count, loff_t *offset)
+{
+	struct inode *inode = file_inode(file);
+	struct task_struct *p;
+	char buffer[PROC_NUMBUF];
+	int group_id, err;
+
+	memset(buffer, 0, sizeof(buffer));
+	if (count > sizeof(buffer) - 1)
+		count = sizeof(buffer) - 1;
+	if (copy_from_user(buffer, buf, count)) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	err = kstrtoint(strstrip(buffer), 0, &group_id);
+	if (err)
+		goto out;
+
+	p = get_proc_task(inode);
+	if (!p)
+		return -ESRCH;
+
+	err = sched_set_group_id(p, group_id);
+
+	put_task_struct(p);
+
+out:
+	return err < 0 ? err : count;
+}
+
+static int sched_group_id_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_group_id_show, inode);
+}
+
+static const struct file_operations proc_pid_sched_group_id_operations = {
+	.open		= sched_group_id_open,
+	.read		= seq_read,
+	.write		= sched_group_id_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif	/* CONFIG_SCHED_RTG_DEBUG */
+
 #ifdef CONFIG_SCHED_AUTOGROUP
 /*
  * Print out autogroup related information:
@@ -3372,6 +3436,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_ACCESS_TOKENID
 	ONE("tokenid", S_IRUSR, proc_token_operations),
 #endif
+#ifdef CONFIG_SCHED_RTG_DEBUG
+	REG("sched_group_id", S_IRUGO|S_IWUGO, proc_pid_sched_group_id_operations),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3704,6 +3771,9 @@ static const struct pid_entry tid_base_stuff[] = {
 #ifdef CONFIG_ACCESS_TOKENID
 	ONE("tokenid", S_IRUSR, proc_token_operations),
 #endif
+#ifdef CONFIG_SCHED_RTG_DEBUG
+	REG("sched_group_id", S_IRUGO|S_IWUGO, proc_pid_sched_group_id_operations),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h
index 85bd334fa9cc..5da7ef60d8ee 100644
--- a/include/linux/sched/rtg.h
+++ b/include/linux/sched/rtg.h
@@ -18,5 +18,6 @@ struct related_thread_group {
 };
 
 int sched_set_group_id(struct task_struct *p, unsigned int group_id);
+unsigned int sched_get_group_id(struct task_struct *p);
 #endif /* CONFIG_SCHED_RTG */
 #endif
diff --git a/kernel/sched/rtg/Kconfig b/kernel/sched/rtg/Kconfig
index 11a0343d935b..a96073631d16 100644
--- a/kernel/sched/rtg/Kconfig
+++ b/kernel/sched/rtg/Kconfig
@@ -7,4 +7,11 @@ config SCHED_RTG
 	help
 	  Set related threads into a group.
 
+config SCHED_RTG_DEBUG
+	bool "Related Thread Group DebugFS"
+	depends on SCHED_RTG
+	default n
+	help
+	  If set, debug node will show rtg threads
+
 endmenu
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
index 6d54c48ba36f..a3fb4481bd78 100644
--- a/kernel/sched/rtg/rtg.c
+++ b/kernel/sched/rtg/rtg.c
@@ -12,6 +12,9 @@ struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID];
 static DEFINE_RWLOCK(related_thread_group_lock);
 static LIST_HEAD(active_related_thread_groups);
 
+#define for_each_related_thread_group(grp) \
+	list_for_each_entry(grp, &active_related_thread_groups, list)
+
 void init_task_rtg(struct task_struct *p)
 {
 	rcu_assign_pointer(p->grp, NULL);
@@ -188,6 +191,19 @@ int sched_set_group_id(struct task_struct *p, unsigned int group_id)
 	return __sched_set_group_id(p, group_id);
 }
 
+unsigned int sched_get_group_id(struct task_struct *p)
+{
+	unsigned int group_id;
+	struct related_thread_group *grp = NULL;
+
+	rcu_read_lock();
+	grp = task_related_thread_group(p);
+	group_id = grp ? grp->id : 0;
+	rcu_read_unlock();
+
+	return group_id;
+}
+
 void update_group_nr_running(struct task_struct *p, int event)
 {
 	struct related_thread_group *grp;
@@ -215,3 +231,138 @@ void update_group_nr_running(struct task_struct *p, int event)
 
 	rcu_read_unlock();
 }
+
+#ifdef CONFIG_SCHED_RTG_DEBUG
+#define seq_printf_rtg(m, x...) \
+do { \
+	if (m) \
+		seq_printf(m, x); \
+	else \
+		printk(x); \
+} while (0)
+
+static void print_rtg_info(struct seq_file *file,
+	const struct related_thread_group *grp)
+{
+	seq_printf_rtg(file, "RTG_ID          : %d\n", grp->id);
+}
+
+static char rtg_task_state_to_char(const struct task_struct *tsk)
+{
+	static const char state_char[] = "RSDTtXZPI";
+	unsigned int tsk_state = READ_ONCE(tsk->state);
+	unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
+
+	BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
+	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
+
+	if (tsk_state == TASK_IDLE)
+		state = TASK_REPORT_IDLE;
+	return state_char[fls(state)];
+}
+
+static inline void print_rtg_task_header(struct seq_file *file,
+	const char *header, int run, int nr)
+{
+	seq_printf_rtg(file,
+		"%s   : %d/%d\n"
+		"STATE		COMM	   PID	PRIO	CPU\n"
+		"---------------------------------------------------------\n",
+		header, run, nr);
+}
+
+static inline void print_rtg_task(struct seq_file *file,
+	const struct task_struct *tsk)
+{
+	seq_printf_rtg(file, "%5c %15s %5d %5d %5d(%*pbl)\n",
+		rtg_task_state_to_char(tsk), tsk->comm, tsk->pid,
+		tsk->prio, task_cpu(tsk), cpumask_pr_args(tsk->cpus_ptr));
+}
+
+static void print_rtg_threads(struct seq_file *file,
+	const struct related_thread_group *grp)
+{
+	struct task_struct *tsk = NULL;
+	int nr_thread = 0;
+
+	list_for_each_entry(tsk, &grp->tasks, grp_list)
+		nr_thread++;
+
+	if (!nr_thread)
+		return;
+
+	print_rtg_task_header(file, "RTG_THREADS",
+		grp->nr_running, nr_thread);
+	list_for_each_entry(tsk, &grp->tasks, grp_list) {
+		if (unlikely(!tsk))
+			continue;
+		get_task_struct(tsk);
+		print_rtg_task(file, tsk);
+		put_task_struct(tsk);
+	}
+	seq_printf_rtg(file, "---------------------------------------------------------\n");
+}
+
+static int sched_rtg_debug_show(struct seq_file *file, void *param)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flags;
+	bool have_task = false;
+
+	for_each_related_thread_group(grp) {
+		if (unlikely(!grp)) {
+			seq_printf_rtg(file, "RTG none\n");
+			return 0;
+		}
+
+		raw_spin_lock_irqsave(&grp->lock, flags);
+		if (list_empty(&grp->tasks)) {
+			raw_spin_unlock_irqrestore(&grp->lock, flags);
+			continue;
+		}
+
+		if (!have_task)
+			have_task = true;
+
+		seq_printf_rtg(file, "\n\n");
+		print_rtg_info(file, grp);
+		print_rtg_threads(file, grp);
+		raw_spin_unlock_irqrestore(&grp->lock, flags);
+	}
+
+	if (!have_task)
+		seq_printf_rtg(file, "RTG tasklist empty\n");
+
+	return 0;
+}
+
+static int sched_rtg_debug_release(struct inode *inode, struct file *file)
+{
+	seq_release(inode, file);
+	return 0;
+}
+
+static int sched_rtg_debug_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_rtg_debug_show, NULL);
+}
+
+static const struct proc_ops sched_rtg_debug_fops = {
+	.proc_open = sched_rtg_debug_open,
+	.proc_read = seq_read,
+	.proc_lseek = seq_lseek,
+	.proc_release = sched_rtg_debug_release,
+};
+
+static int __init init_sched_rtg_debug_procfs(void)
+{
+	struct proc_dir_entry *pe = NULL;
+
+	pe = proc_create("sched_rtg_debug",
+		0400, NULL, &sched_rtg_debug_fops);
+	if (unlikely(!pe))
+		return -ENOMEM;
+	return 0;
+}
+late_initcall(init_sched_rtg_debug_procfs);
+#endif
-- 
Gitee


From 4adbc9fbbc760de140d78d84eb75869adee30c40 Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Mon, 14 Feb 2022 11:25:07 +0800
Subject: [PATCH 04/11] sched: Provide independent load tracking for each group

codeaurora inclusion
category: feature
issue: #I4SULH
CVE: NA

Signed-off-by: Li Ming <limingming.li@huawei.com>

-------------------------------------------

1. record group load in grp->ravg.
2. task's cpu usage is accounted in grp->cpu_time[cpu]->curr/prev_runnable_sum
   when its ->grp is not NULL, otherwise rq->curr/prev_runnable_sum.

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
---
 include/linux/sched.h       |   6 +
 include/linux/sched/rtg.h   |  22 +++
 include/trace/events/walt.h |  88 ++++++++++++
 kernel/sched/rtg/rtg.c      | 272 +++++++++++++++++++++++++++++++++++-
 kernel/sched/rtg/rtg.h      |   7 +
 kernel/sched/sched.h        |   3 +
 kernel/sched/walt.c         |  92 ++++++++++--
 kernel/sched/walt.h         |  11 ++
 8 files changed, 489 insertions(+), 12 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c5e0c99cb3cd..393cdfdfa6d9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -223,6 +223,12 @@ enum task_event {
 	IRQ_UPDATE      = 5,
 };
 
+/* Note: this need to be in sync with migrate_type_names array */
+enum migrate_types {
+	GROUP_TO_RQ,
+	RQ_TO_GROUP,
+};
+
 #ifdef CONFIG_CPU_ISOLATION_OPT
 extern int sched_isolate_count(const cpumask_t *mask, bool include_offline);
 extern int sched_isolate_cpu(int cpu);
diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h
index 5da7ef60d8ee..a35114766acb 100644
--- a/include/linux/sched/rtg.h
+++ b/include/linux/sched/rtg.h
@@ -8,6 +8,22 @@
 #define DEFAULT_CGROUP_COLOC_ID	1
 #define MAX_NUM_CGROUP_COLOC_ID	21
 
+struct group_cpu_time {
+	u64	window_start;
+	u64	curr_runnable_sum;
+	u64	prev_runnable_sum;
+	u64	nt_curr_runnable_sum;
+	u64	nt_prev_runnable_sum;
+};
+
+struct group_ravg {
+	unsigned long curr_window_load;
+	unsigned long curr_window_exec;
+	unsigned long prev_window_load;
+	unsigned long prev_window_exec;
+	unsigned long normalized_util;
+};
+
 struct related_thread_group {
 	int id;
 	raw_spinlock_t lock;
@@ -15,6 +31,12 @@ struct related_thread_group {
 	struct list_head list;
 
 	unsigned int nr_running;
+	struct group_ravg ravg;
+	u64 window_start;
+	u64 mark_start;
+	u64 prev_window_time;
+	/* rtg window information for WALT */
+	unsigned int window_size;
 };
 
 int sched_set_group_id(struct task_struct *p, unsigned int group_id);
diff --git a/include/trace/events/walt.h b/include/trace/events/walt.h
index e5328b75a8bd..9af92c8689b9 100644
--- a/include/trace/events/walt.h
+++ b/include/trace/events/walt.h
@@ -47,6 +47,43 @@ static inline s64 __rq_update_sum(struct rq *rq, bool curr, bool new)
 		else
 			return rq->prev_runnable_sum;
 }
+
+#ifdef CONFIG_SCHED_RTG
+static inline s64 __grp_update_sum(struct rq *rq, bool curr, bool new)
+{
+	if (curr)
+		if (new)
+			return rq->grp_time.nt_curr_runnable_sum;
+		else
+			return rq->grp_time.curr_runnable_sum;
+	else
+		if (new)
+			return rq->grp_time.nt_prev_runnable_sum;
+		else
+			return rq->grp_time.prev_runnable_sum;
+}
+
+static inline s64
+__get_update_sum(struct rq *rq, enum migrate_types migrate_type,
+		 bool src, bool new, bool curr)
+{
+	switch (migrate_type) {
+	case RQ_TO_GROUP:
+		if (src)
+			return __rq_update_sum(rq, curr, new);
+		else
+			return __grp_update_sum(rq, curr, new);
+	case GROUP_TO_RQ:
+		if (src)
+			return __grp_update_sum(rq, curr, new);
+		else
+			return __rq_update_sum(rq, curr, new);
+	default:
+		WARN_ON_ONCE(1);
+		return -1;
+	}
+}
+#endif
 #endif
 
 TRACE_EVENT(sched_update_history,
@@ -162,6 +199,57 @@ TRACE_EVENT(sched_update_task_ravg,
 		__entry->active_windows)
 );
 
+extern const char *migrate_type_names[];
+
+#ifdef CONFIG_SCHED_RTG
+TRACE_EVENT(sched_migration_update_sum,
+
+	TP_PROTO(struct task_struct *p, enum migrate_types migrate_type, struct rq *rq),
+
+	TP_ARGS(p, migrate_type, rq),
+
+	TP_STRUCT__entry(
+		__field(int, tcpu)
+		__field(int, pid)
+		__field(enum migrate_types, migrate_type)
+		__field(s64, src_cs)
+		__field(s64, src_ps)
+		__field(s64, dst_cs)
+		__field(s64, dst_ps)
+		__field(s64, src_nt_cs)
+		__field(s64, src_nt_ps)
+		__field(s64, dst_nt_cs)
+		__field(s64, dst_nt_ps)
+	),
+
+	TP_fast_assign(
+		__entry->tcpu		= task_cpu(p);
+		__entry->pid		= p->pid;
+		__entry->migrate_type	= migrate_type;
+		__entry->src_cs		= __get_update_sum(rq, migrate_type,
+							   true, false, true);
+		__entry->src_ps		= __get_update_sum(rq, migrate_type,
+							   true, false, false);
+		__entry->dst_cs		= __get_update_sum(rq, migrate_type,
+							   false, false, true);
+		__entry->dst_ps		= __get_update_sum(rq, migrate_type,
+							   false, false, false);
+		__entry->src_nt_cs	= __get_update_sum(rq, migrate_type,
+							   true, true, true);
+		__entry->src_nt_ps	= __get_update_sum(rq, migrate_type,
+							   true, true, false);
+		__entry->dst_nt_cs	= __get_update_sum(rq, migrate_type,
+							   false, true, true);
+		__entry->dst_nt_ps	= __get_update_sum(rq, migrate_type,
+							   false, true, false);
+	),
+
+	TP_printk("pid %d task_cpu %d migrate_type %s src_cs %llu src_ps %llu dst_cs %lld dst_ps %lld src_nt_cs %llu src_nt_ps %llu dst_nt_cs %lld dst_nt_ps %lld",
+		__entry->pid, __entry->tcpu, migrate_type_names[__entry->migrate_type],
+		__entry->src_cs, __entry->src_ps, __entry->dst_cs, __entry->dst_ps,
+		__entry->src_nt_cs, __entry->src_nt_ps, __entry->dst_nt_cs, __entry->dst_nt_ps)
+);
+#endif
 #endif /* _TRACE_WALT_H */
 
 /* This part must be outside protection */
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
index a3fb4481bd78..76d8f366fff5 100644
--- a/kernel/sched/rtg/rtg.c
+++ b/kernel/sched/rtg/rtg.c
@@ -4,9 +4,16 @@
  *
  */
 #include <linux/sched.h>
+#include <trace/events/walt.h>
 
 #include "../sched.h"
 #include "rtg.h"
+#include "../walt.h"
+
+#define ADD_TASK	0
+#define REM_TASK	1
+
+#define DEFAULT_GROUP_RATE		60 /* 60FPS */
 
 struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID];
 static DEFINE_RWLOCK(related_thread_group_lock);
@@ -48,6 +55,7 @@ int alloc_related_thread_groups(void)
 		grp->id = i;
 		INIT_LIST_HEAD(&grp->tasks);
 		INIT_LIST_HEAD(&grp->list);
+		grp->window_size = NSEC_PER_SEC / DEFAULT_GROUP_RATE;
 		raw_spin_lock_init(&grp->lock);
 
 		related_thread_groups[i] = grp;
@@ -69,6 +77,111 @@ int alloc_related_thread_groups(void)
 	return ret;
 }
 
+/*
+ * Task's cpu usage is accounted in:
+ *	rq->curr/prev_runnable_sum,  when its ->grp is NULL
+ *	grp->cpu_time[cpu]->curr/prev_runnable_sum, when its ->grp is !NULL
+ *
+ * Transfer task's cpu usage between those counters when transitioning between
+ * groups
+ */
+static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
+				struct task_struct *p, int event)
+{
+	u64 wallclock;
+	struct group_cpu_time *cpu_time;
+	u64 *src_curr_runnable_sum, *dst_curr_runnable_sum;
+	u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
+	u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
+	u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
+	int migrate_type;
+	int cpu = cpu_of(rq);
+	bool new_task;
+	int i;
+
+	wallclock = sched_ktime_clock();
+
+	update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+	update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0);
+	new_task = is_new_task(p);
+
+	cpu_time = &rq->grp_time;
+	if (event == ADD_TASK) {
+		migrate_type = RQ_TO_GROUP;
+
+		src_curr_runnable_sum = &rq->curr_runnable_sum;
+		dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
+		src_prev_runnable_sum = &rq->prev_runnable_sum;
+		dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
+
+		src_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
+		dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+		src_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
+		dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
+
+		*src_curr_runnable_sum -= p->ravg.curr_window_cpu[cpu];
+		*src_prev_runnable_sum -= p->ravg.prev_window_cpu[cpu];
+		if (new_task) {
+			*src_nt_curr_runnable_sum -=
+					p->ravg.curr_window_cpu[cpu];
+			*src_nt_prev_runnable_sum -=
+					p->ravg.prev_window_cpu[cpu];
+		}
+
+		update_cluster_load_subtractions(p, cpu,
+				rq->window_start, new_task);
+
+	} else {
+		migrate_type = GROUP_TO_RQ;
+
+		src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
+		dst_curr_runnable_sum = &rq->curr_runnable_sum;
+		src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
+		dst_prev_runnable_sum = &rq->prev_runnable_sum;
+
+		src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+		dst_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
+		src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
+		dst_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
+
+		*src_curr_runnable_sum -= p->ravg.curr_window;
+		*src_prev_runnable_sum -= p->ravg.prev_window;
+		if (new_task) {
+			*src_nt_curr_runnable_sum -= p->ravg.curr_window;
+			*src_nt_prev_runnable_sum -= p->ravg.prev_window;
+		}
+
+		/*
+		 * Need to reset curr/prev windows for all CPUs, not just the
+		 * ones in the same cluster. Since inter cluster migrations
+		 * did not result in the appropriate book keeping, the values
+		 * per CPU would be inaccurate.
+		 */
+		for_each_possible_cpu(i) {
+			p->ravg.curr_window_cpu[i] = 0;
+			p->ravg.prev_window_cpu[i] = 0;
+		}
+	}
+
+	*dst_curr_runnable_sum += p->ravg.curr_window;
+	*dst_prev_runnable_sum += p->ravg.prev_window;
+	if (new_task) {
+		*dst_nt_curr_runnable_sum += p->ravg.curr_window;
+		*dst_nt_prev_runnable_sum += p->ravg.prev_window;
+	}
+
+	/*
+	 * When a task enter or exits a group, it's curr and prev windows are
+	 * moved to a single CPU. This behavior might be sub-optimal in the
+	 * exit case, however, it saves us the overhead of handling inter
+	 * cluster migration fixups while the task is part of a related group.
+	 */
+	p->ravg.curr_window_cpu[cpu] = p->ravg.curr_window;
+	p->ravg.prev_window_cpu[cpu] = p->ravg.prev_window;
+
+	trace_sched_migration_update_sum(p, migrate_type, rq);
+}
+
 static void remove_task_from_group(struct task_struct *p)
 {
 	struct related_thread_group *grp = p->grp;
@@ -78,6 +191,7 @@ static void remove_task_from_group(struct task_struct *p)
 	unsigned long irqflag;
 
 	rq = __task_rq_lock(p, &flag);
+	transfer_busy_time(rq, p->grp, p, REM_TASK);
 
 	raw_spin_lock_irqsave(&grp->lock, irqflag);
 	list_del_init(&p->grp_list);
@@ -121,12 +235,17 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
 	 * reference of p->grp in various hot-paths
 	 */
 	rq = __task_rq_lock(p, &flag);
+	transfer_busy_time(rq, grp, p, ADD_TASK);
 
 	raw_spin_lock_irqsave(&grp->lock, irqflag);
 	list_add(&p->grp_list, &grp->tasks);
 	rcu_assign_pointer(p->grp, grp);
-	if (p->on_cpu)
+	if (p->on_cpu) {
 		grp->nr_running++;
+		if (grp->nr_running == 1)
+			grp->mark_start = max(grp->mark_start,
+					      sched_ktime_clock());
+	}
 
 	raw_spin_unlock_irqrestore(&grp->lock, irqflag);
 	__task_rq_unlock(rq, &flag);
@@ -232,6 +351,157 @@ void update_group_nr_running(struct task_struct *p, int event)
 	rcu_read_unlock();
 }
 
+int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flag;
+
+	if (!window_size)
+		return -EINVAL;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set window size for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flag);
+	grp->window_size = window_size;
+	raw_spin_unlock_irqrestore(&grp->lock, flag);
+
+	return 0;
+}
+
+void group_time_rollover(struct group_ravg *ravg)
+{
+	ravg->prev_window_load = ravg->curr_window_load;
+	ravg->curr_window_load = 0;
+	ravg->prev_window_exec = ravg->curr_window_exec;
+	ravg->curr_window_exec = 0;
+}
+
+int sched_set_group_window_rollover(unsigned int grp_id)
+{
+	struct related_thread_group *grp = NULL;
+	u64 wallclock;
+	unsigned long flag;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set window start for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flag);
+
+	wallclock = sched_ktime_clock();
+	grp->prev_window_time = wallclock - grp->window_start;
+	grp->window_start = wallclock;
+
+	group_time_rollover(&grp->ravg);
+	raw_spin_unlock_irqrestore(&grp->lock, flag);
+
+	return 0;
+}
+
+static void add_to_group_time(struct related_thread_group *grp, struct rq *rq, u64 wallclock)
+{
+	u64 delta_exec, delta_load;
+	u64 mark_start = grp->mark_start;
+	u64 window_start = grp->window_start;
+
+	if (unlikely(wallclock <= mark_start))
+		return;
+
+	/* per group load tracking in RTG */
+	if (likely(mark_start >= window_start)) {
+		/*
+		 *   ws   ms  wc
+		 *   |    |   |
+		 *   V    V   V
+		 *   |---------------|
+		 */
+		delta_exec = wallclock - mark_start;
+		grp->ravg.curr_window_exec += delta_exec;
+
+		delta_load = scale_exec_time(delta_exec, rq);
+		grp->ravg.curr_window_load += delta_load;
+	} else {
+		/*
+		 *   ms   ws  wc
+		 *   |    |   |
+		 *   V    V   V
+		 *   -----|----------
+		 */
+		/* prev window statistic */
+		delta_exec = window_start - mark_start;
+		grp->ravg.prev_window_exec += delta_exec;
+
+		delta_load = scale_exec_time(delta_exec, rq);
+		grp->ravg.prev_window_load += delta_load;
+
+		/* curr window statistic */
+		delta_exec = wallclock - window_start;
+		grp->ravg.curr_window_exec += delta_exec;
+
+		delta_load = scale_exec_time(delta_exec, rq);
+		grp->ravg.curr_window_load += delta_load;
+	}
+}
+
+static inline void add_to_group_demand(struct related_thread_group *grp,
+				struct rq *rq, u64 wallclock)
+{
+	if (unlikely(wallclock <= grp->window_start))
+		return;
+
+	add_to_group_time(grp, rq, wallclock);
+}
+
+static int account_busy_for_group_demand(struct task_struct *p, int event)
+{
+	/*
+	 *No need to bother updating task demand for exiting tasks
+	 * or the idle task.
+	 */
+	if (exiting_task(p) || is_idle_task(p))
+		return 0;
+
+	if (event == TASK_WAKE || event == TASK_MIGRATE)
+		return 0;
+
+	return 1;
+}
+
+void update_group_demand(struct task_struct *p, struct rq *rq,
+				int event, u64 wallclock)
+{
+	struct related_thread_group *grp;
+
+	if (!account_busy_for_group_demand(p, event))
+		return;
+
+	rcu_read_lock();
+	grp = task_related_thread_group(p);
+	if (!grp) {
+		rcu_read_unlock();
+		return;
+	}
+
+	raw_spin_lock(&grp->lock);
+
+	if (grp->nr_running == 1)
+		grp->mark_start = max(grp->mark_start, p->ravg.mark_start);
+
+	add_to_group_demand(grp, rq, wallclock);
+
+	grp->mark_start = wallclock;
+
+	raw_spin_unlock(&grp->lock);
+
+	rcu_read_unlock();
+}
+
 #ifdef CONFIG_SCHED_RTG_DEBUG
 #define seq_printf_rtg(m, x...) \
 do { \
diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h
index 80661f8b2d32..5970d28cadef 100644
--- a/kernel/sched/rtg/rtg.h
+++ b/kernel/sched/rtg/rtg.h
@@ -14,6 +14,13 @@ int alloc_related_thread_groups(void);
 struct related_thread_group *lookup_related_thread_group(unsigned int group_id);
 struct related_thread_group *task_related_thread_group(struct task_struct *p);
 void update_group_nr_running(struct task_struct *p, int event);
+struct rq;
+void update_group_demand(struct task_struct *p, struct rq *rq,
+				int event, u64 wallclock);
+int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size);
+int sched_set_group_window_rollover(unsigned int grp_id);
+struct group_cpu_time *group_update_cpu_time(struct rq *rq,
+	struct related_thread_group *grp);
 #else
 static inline int alloc_related_thread_groups(void) { return 0; }
 #endif /* CONFIG_SCHED_RTG */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 22ff400d5b08..fdb69a9ad1f9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1077,7 +1077,10 @@ struct rq {
 	u64 nt_prev_runnable_sum;
 	u64 cum_window_demand_scaled;
 	struct load_subtractions load_subs[NUM_TRACKED_WINDOWS];
+#ifdef CONFIG_SCHED_RTG
+	struct group_cpu_time grp_time;
 #endif
+#endif /* CONFIG_SCHED_WALT */
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 	u64			prev_irq_time;
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index f560321b8691..a2824cc9bc2e 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -32,6 +32,8 @@
 const char *task_event_names[] = {"PUT_PREV_TASK", "PICK_NEXT_TASK",
 				  "TASK_WAKE", "TASK_MIGRATE", "TASK_UPDATE",
 				  "IRQ_UPDATE"};
+const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP",
+					"RQ_TO_RQ", "GROUP_TO_GROUP"};
 
 #define SCHED_FREQ_ACCOUNT_WAIT_TIME 0
 #define SCHED_ACCOUNT_WAIT_TIME 1
@@ -476,6 +478,13 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
 	struct rq *dest_rq = cpu_rq(new_cpu);
 	u64 wallclock;
 	bool new_task;
+#ifdef CONFIG_SCHED_RTG
+	u64 *src_curr_runnable_sum, *dst_curr_runnable_sum;
+	u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
+	u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
+	u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
+	struct related_thread_group *grp;
+#endif
 
 	if (!p->on_rq && p->state != TASK_WAKING)
 		return;
@@ -513,9 +522,58 @@ void fixup_busy_time(struct task_struct *p, int new_cpu)
 	}
 
 	new_task = is_new_task(p);
+#ifdef CONFIG_SCHED_RTG
+	/* Protected by rq_lock */
+	grp = task_related_thread_group(p);
+
+	/*
+	 * For frequency aggregation, we continue to do migration fixups
+	 * even for intra cluster migrations. This is because, the aggregated
+	 * load has to reported on a single CPU regardless.
+	 */
+	if (grp) {
+		struct group_cpu_time *cpu_time;
+
+		cpu_time = &src_rq->grp_time;
+		src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
+		src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
+		src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+		src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
+
+		cpu_time = &dest_rq->grp_time;
+		dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
+		dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
+		dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+		dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
+
+		if (p->ravg.curr_window) {
+			*src_curr_runnable_sum -= p->ravg.curr_window;
+			*dst_curr_runnable_sum += p->ravg.curr_window;
+			if (new_task) {
+				*src_nt_curr_runnable_sum -=
+							p->ravg.curr_window;
+				*dst_nt_curr_runnable_sum +=
+							p->ravg.curr_window;
+			}
+		}
 
-	inter_cluster_migration_fixup(p, new_cpu,
-					task_cpu(p), new_task);
+		if (p->ravg.prev_window) {
+			*src_prev_runnable_sum -= p->ravg.prev_window;
+			*dst_prev_runnable_sum += p->ravg.prev_window;
+			if (new_task) {
+				*src_nt_prev_runnable_sum -=
+							p->ravg.prev_window;
+				*dst_nt_prev_runnable_sum +=
+							p->ravg.prev_window;
+			}
+		}
+	} else {
+#endif
+		inter_cluster_migration_fixup(p, new_cpu,
+						task_cpu(p), new_task);
+#ifdef CONFIG_SCHED_RTG
+	}
+#endif
 
 	if (!same_freq_domain(new_cpu, task_cpu(p)))
 		irq_work_queue(&walt_migration_irq_work);
@@ -634,15 +692,6 @@ static void update_history(struct rq *rq, struct task_struct *p,
 
 #define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
 
-static inline u64 scale_exec_time(u64 delta, struct rq *rq)
-{
-	unsigned long capcurr = capacity_curr_of(cpu_of(rq));
-
-	delta = (delta * capcurr) >> SCHED_CAPACITY_SHIFT;
-
-	return delta;
-}
-
 static u64 add_to_task_demand(struct rq *rq, struct task_struct *p, u64 delta)
 {
 	delta = scale_exec_time(delta, rq);
@@ -712,6 +761,10 @@ static u64 update_task_demand(struct task_struct *p, struct rq *rq,
 	u32 window_size = sched_ravg_window;
 	u64 runtime;
 
+#ifdef CONFIG_SCHED_RTG
+	update_group_demand(p, rq, event, wallclock);
+#endif
+
 	new_window = mark_start < window_start;
 	if (!account_busy_for_task_demand(rq, p, event)) {
 		if (new_window)
@@ -870,6 +923,10 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
 	u64 *nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
 	bool new_task;
 	int cpu = rq->cpu;
+#ifdef CONFIG_SCHED_RTG
+	struct group_cpu_time *cpu_time;
+	struct related_thread_group *grp;
+#endif
 
 	new_window = mark_start < window_start;
 	if (new_window) {
@@ -895,6 +952,19 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
 	if (!account_busy_for_cpu_time(rq, p, irqtime, event))
 		goto done;
 
+#ifdef CONFIG_SCHED_RTG
+	grp = task_related_thread_group(p);
+	if (grp) {
+		cpu_time = &rq->grp_time;
+
+		curr_runnable_sum = &cpu_time->curr_runnable_sum;
+		prev_runnable_sum = &cpu_time->prev_runnable_sum;
+
+		nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+		nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
+	}
+#endif
+
 	if (!new_window) {
 		/*
 		 * account_busy_for_cpu_time() = 1 so busy time needs
diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h
index fcb1555d53f8..84da97ccce20 100644
--- a/kernel/sched/walt.h
+++ b/kernel/sched/walt.h
@@ -45,6 +45,15 @@ static inline struct sched_cluster *cpu_cluster(int cpu)
 	return cpu_rq(cpu)->cluster;
 }
 
+static inline u64 scale_exec_time(u64 delta, struct rq *rq)
+{
+	unsigned long capcurr = capacity_curr_of(cpu_of(rq));
+
+	delta = (delta * capcurr) >> SCHED_CAPACITY_SHIFT;
+
+	return delta;
+}
+
 static inline bool is_new_task(struct task_struct *p)
 {
 	return p->ravg.active_windows < SCHED_NEW_TASK_WINDOWS;
@@ -192,6 +201,8 @@ static inline void assign_cluster_ids(struct list_head *head)
 	}
 }
 
+extern void update_cluster_load_subtractions(struct task_struct *p,
+		int cpu, u64 ws, bool new_task);
 #else /* CONFIG_SCHED_WALT */
 static inline void walt_sched_init_rq(struct rq *rq) { }
 
-- 
Gitee


From 39ceb100c088bef899f96f2b733fe1f6b90b0564 Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Mon, 14 Feb 2022 13:19:01 +0800
Subject: [PATCH 05/11] sched: scehd: Introduce sched_update_rtg_tick()

ohos inclusion
category: feature
issue: #I4SULH
CVE: NA

-------------------------------------------

sched_update_rtg_tick() is called in tick.

Signed-off-by: Li Ming <limingming.li@huawei.com>
---
 include/linux/sched/rtg.h |  7 +++++++
 kernel/sched/core.c       |  3 +++
 kernel/sched/rtg/rtg.c    | 17 +++++++++++++++++
 kernel/sched/rtg/rtg.h    |  1 +
 4 files changed, 28 insertions(+)

diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h
index a35114766acb..b5cc92fcece9 100644
--- a/include/linux/sched/rtg.h
+++ b/include/linux/sched/rtg.h
@@ -24,6 +24,8 @@ struct group_ravg {
 	unsigned long normalized_util;
 };
 
+struct rtg_class;
+
 struct related_thread_group {
 	int id;
 	raw_spinlock_t lock;
@@ -37,6 +39,11 @@ struct related_thread_group {
 	u64 prev_window_time;
 	/* rtg window information for WALT */
 	unsigned int window_size;
+	const struct rtg_class *rtg_class;
+};
+
+struct rtg_class {
+	void (*sched_update_rtg_tick)(struct related_thread_group *grp);
 };
 
 int sched_set_group_id(struct task_struct *p, unsigned int group_id);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 574c155b9e3a..20dd5009e315 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4124,6 +4124,9 @@ void scheduler_tick(void)
 
 	rq_unlock(rq, &rf);
 
+#ifdef CONFIG_SCHED_RTG
+	sched_update_rtg_tick(curr);
+#endif
 	perf_event_task_tick();
 
 #ifdef CONFIG_SMP
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
index 76d8f366fff5..78fbcd1b9cd3 100644
--- a/kernel/sched/rtg/rtg.c
+++ b/kernel/sched/rtg/rtg.c
@@ -502,6 +502,23 @@ void update_group_demand(struct task_struct *p, struct rq *rq,
 	rcu_read_unlock();
 }
 
+void sched_update_rtg_tick(struct task_struct *p)
+{
+	struct related_thread_group *grp = NULL;
+
+	rcu_read_lock();
+	grp = task_related_thread_group(p);
+	if (!grp || list_empty(&grp->tasks)) {
+		rcu_read_unlock();
+		return;
+	}
+
+	if (grp->rtg_class && grp->rtg_class->sched_update_rtg_tick)
+		grp->rtg_class->sched_update_rtg_tick(grp);
+
+	rcu_read_unlock();
+}
+
 #ifdef CONFIG_SCHED_RTG_DEBUG
 #define seq_printf_rtg(m, x...) \
 do { \
diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h
index 5970d28cadef..e32c67aebb96 100644
--- a/kernel/sched/rtg/rtg.h
+++ b/kernel/sched/rtg/rtg.h
@@ -21,6 +21,7 @@ int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size);
 int sched_set_group_window_rollover(unsigned int grp_id);
 struct group_cpu_time *group_update_cpu_time(struct rq *rq,
 	struct related_thread_group *grp);
+void sched_update_rtg_tick(struct task_struct *p);
 #else
 static inline int alloc_related_thread_groups(void) { return 0; }
 #endif /* CONFIG_SCHED_RTG */
-- 
Gitee


From c18d29d98ab6d78f6ba5a16b4ba980963368624c Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Mon, 14 Feb 2022 14:51:40 +0800
Subject: [PATCH 06/11] sched: Introduce perferred cluster to optimize cpu
 selection for related threads

codeaurora inclusion
category: feature
issue: #I4SULH
CVE: NA

Signed-off-by: Li Ming <limingming.li@huawei.com>

-------------------------------------------

Set the preferred cluster of group according to the
group load and prioritize cpu selection for related
threads from preferred cluster

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
---
 include/linux/sched/rtg.h |   1 +
 kernel/sched/fair.c       |  76 ++++++++++++++--
 kernel/sched/rtg/rtg.c    | 186 ++++++++++++++++++++++++++++++++++++++
 kernel/sched/rtg/rtg.h    |  12 +++
 kernel/sched/sched.h      |  13 ++-
 kernel/sched/walt.h       |   6 ++
 6 files changed, 286 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h
index b5cc92fcece9..eae7f83808ff 100644
--- a/include/linux/sched/rtg.h
+++ b/include/linux/sched/rtg.h
@@ -40,6 +40,7 @@ struct related_thread_group {
 	/* rtg window information for WALT */
 	unsigned int window_size;
 	const struct rtg_class *rtg_class;
+	struct sched_cluster *preferred_cluster;
 };
 
 struct rtg_class {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 42d51caa611c..3b8d6c1dfc30 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -22,6 +22,7 @@
  */
 #include "sched.h"
 #include "walt.h"
+#include "rtg/rtg.h"
 
 #ifdef CONFIG_SCHED_WALT
 static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p,
@@ -773,7 +774,6 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
 static unsigned long task_h_load(struct task_struct *p);
-static unsigned long capacity_of(int cpu);
 
 /* Give new sched_entity start runnable values to heavy its load in infant time */
 void init_entity_runnable_average(struct sched_entity *se)
@@ -4104,8 +4104,27 @@ static inline int task_fits_capacity(struct task_struct *p, long capacity)
 	return fits_capacity(uclamp_task_util(p), capacity);
 }
 
+#ifdef CONFIG_SCHED_RTG
+bool task_fits_max(struct task_struct *p, int cpu)
+{
+	unsigned long capacity = capacity_orig_of(cpu);
+	unsigned long max_capacity = cpu_rq(cpu)->rd->max_cpu_capacity;
+
+	if (capacity == max_capacity)
+		return true;
+
+	return task_fits_capacity(p, capacity);
+}
+#endif
+
 static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
 {
+	bool task_fits = false;
+#ifdef CONFIG_SCHED_RTG
+	int cpu = cpu_of(rq);
+	struct cpumask *rtg_target = NULL;
+#endif
+
 	if (!static_branch_unlikely(&sched_asym_cpucapacity))
 		return;
 
@@ -4114,7 +4133,17 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
 		return;
 	}
 
-	if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) {
+#ifdef CONFIG_SCHED_RTG
+	rtg_target = find_rtg_target(p);
+	if (rtg_target)
+		task_fits = capacity_orig_of(cpu) >=
+				capacity_orig_of(cpumask_first(rtg_target));
+	else
+		task_fits = task_fits_capacity(p, capacity_of(cpu_of(rq)));
+#else
+	task_fits = task_fits_capacity(p, capacity_of(cpu_of(rq)));
+#endif
+	if (task_fits) {
 		rq->misfit_task_load = 0;
 		return;
 	}
@@ -5805,11 +5834,6 @@ static unsigned long cpu_runnable_without(struct rq *rq, struct task_struct *p)
 	return runnable;
 }
 
-static unsigned long capacity_of(int cpu)
-{
-	return cpu_rq(cpu)->cpu_capacity;
-}
-
 static void record_wakee(struct task_struct *p)
 {
 	/*
@@ -6574,6 +6598,12 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
 	return min_t(unsigned long, util, capacity_orig_of(cpu));
 }
 
+#ifdef CONFIG_SCHED_RTG
+unsigned long capacity_spare_without(int cpu, struct task_struct *p)
+{
+	return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0);
+}
+#endif
 /*
  * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
  * to @dst_cpu.
@@ -6840,6 +6870,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	int new_cpu = prev_cpu;
 	int want_affine = 0;
 	int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
+#ifdef CONFIG_SCHED_RTG
+	int target_cpu = -1;
+		target_cpu = find_rtg_cpu(p);
+		if (target_cpu >= 0)
+			return target_cpu;
+#endif
 
 	if (sd_flag & SD_BALANCE_WAKE) {
 		record_wakee(p);
@@ -7524,6 +7560,7 @@ enum migration_type {
 #define LBF_SOME_PINNED	0x08
 #define LBF_NOHZ_STATS	0x10
 #define LBF_NOHZ_AGAIN	0x20
+#define LBF_IGNORE_PREFERRED_CLUSTER_TASKS 0x200
 
 struct lb_env {
 	struct sched_domain	*sd;
@@ -7706,6 +7743,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	/* Record that we found atleast one task that could run on dst_cpu */
 	env->flags &= ~LBF_ALL_PINNED;
 
+
+#ifdef CONFIG_SCHED_RTG
+	if (env->flags & LBF_IGNORE_PREFERRED_CLUSTER_TASKS &&
+			 !preferred_cluster(cpu_rq(env->dst_cpu)->cluster, p))
+		return 0;
+#endif
+
 	if (task_running(env->src_rq, p)) {
 		schedstat_inc(p->se.statistics.nr_failed_migrations_running);
 		return 0;
@@ -7798,12 +7842,21 @@ static int detach_tasks(struct lb_env *env)
 	unsigned long util, load;
 	struct task_struct *p;
 	int detached = 0;
+#ifdef CONFIG_SCHED_RTG
+	int orig_loop = env->loop;
+#endif
 
 	lockdep_assert_held(&env->src_rq->lock);
 
 	if (env->imbalance <= 0)
 		return 0;
 
+#ifdef CONFIG_SCHED_RTG
+	if (!same_cluster(env->dst_cpu, env->src_cpu))
+		env->flags |= LBF_IGNORE_PREFERRED_CLUSTER_TASKS;
+
+redo:
+#endif
 	while (!list_empty(tasks)) {
 		/*
 		 * We don't want to steal all, otherwise we may be treated likewise,
@@ -7905,6 +7958,15 @@ static int detach_tasks(struct lb_env *env)
 		list_move(&p->se.group_node, tasks);
 	}
 
+#ifdef CONFIG_SCHED_RTG
+	if (env->flags & LBF_IGNORE_PREFERRED_CLUSTER_TASKS && !detached) {
+		tasks = &env->src_rq->cfs_tasks;
+		env->flags &= ~LBF_IGNORE_PREFERRED_CLUSTER_TASKS;
+		env->loop = orig_loop;
+		goto redo;
+	}
+#endif
+
 	/*
 	 * Right now, this is one of only two places we collect this stat
 	 * so we can safely collect detach_one_task() stats here rather
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
index 78fbcd1b9cd3..200895617a71 100644
--- a/kernel/sched/rtg/rtg.c
+++ b/kernel/sched/rtg/rtg.c
@@ -4,6 +4,7 @@
  *
  */
 #include <linux/sched.h>
+#include <linux/cpumask.h>
 #include <trace/events/walt.h>
 
 #include "../sched.h"
@@ -182,6 +183,8 @@ static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
 	trace_sched_migration_update_sum(p, migrate_type, rq);
 }
 
+static void _set_preferred_cluster(struct related_thread_group *grp,
+				   int sched_cluster_id);
 static void remove_task_from_group(struct task_struct *p)
 {
 	struct related_thread_group *grp = p->grp;
@@ -207,6 +210,8 @@ static void remove_task_from_group(struct task_struct *p)
 
 	if (!list_empty(&grp->tasks))
 		empty_group = false;
+	else
+		_set_preferred_cluster(grp, -1);
 
 	raw_spin_unlock_irqrestore(&grp->lock, irqflag);
 	__task_rq_unlock(rq, &flag);
@@ -519,6 +524,185 @@ void sched_update_rtg_tick(struct task_struct *p)
 	rcu_read_unlock();
 }
 
+int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p)
+{
+	struct related_thread_group *grp = NULL;
+	int rc = 1;
+
+	rcu_read_lock();
+
+	grp = task_related_thread_group(p);
+	if (grp != NULL)
+		rc = (grp->preferred_cluster == cluster);
+
+	rcu_read_unlock();
+	return rc;
+}
+
+unsigned int get_cluster_grp_running(int cluster_id)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned int total_grp_running = 0;
+	unsigned long flag, rtg_flag;
+	unsigned int i;
+
+	read_lock_irqsave(&related_thread_group_lock, rtg_flag);
+
+	/* grp_id 0 is used for exited tasks */
+	for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
+		grp = lookup_related_thread_group(i);
+		if (!grp)
+			continue;
+
+		raw_spin_lock_irqsave(&grp->lock, flag);
+		if (grp->preferred_cluster != NULL &&
+		    grp->preferred_cluster->id == cluster_id)
+			total_grp_running += grp->nr_running;
+		raw_spin_unlock_irqrestore(&grp->lock, flag);
+	}
+	read_unlock_irqrestore(&related_thread_group_lock, rtg_flag);
+
+	return total_grp_running;
+}
+
+static void _set_preferred_cluster(struct related_thread_group *grp,
+				   int sched_cluster_id)
+{
+	struct sched_cluster *cluster = NULL;
+	struct sched_cluster *cluster_found = NULL;
+
+	if (sched_cluster_id == -1) {
+		grp->preferred_cluster = NULL;
+		return;
+	}
+
+	for_each_sched_cluster_reverse(cluster) {
+		if (cluster->id == sched_cluster_id) {
+			cluster_found = cluster;
+			break;
+		}
+	}
+
+	if (cluster_found != NULL)
+		grp->preferred_cluster = cluster_found;
+	else
+		pr_err("cannot found sched_cluster_id=%d\n", sched_cluster_id);
+}
+
+/*
+ * sched_cluster_id == -1: grp will set to NULL
+ */
+static void set_preferred_cluster(struct related_thread_group *grp,
+				  int sched_cluster_id)
+{
+	unsigned long flag;
+
+	raw_spin_lock_irqsave(&grp->lock, flag);
+	_set_preferred_cluster(grp, sched_cluster_id);
+	raw_spin_unlock_irqrestore(&grp->lock, flag);
+}
+
+int sched_set_group_preferred_cluster(unsigned int grp_id, int sched_cluster_id)
+{
+	struct related_thread_group *grp = NULL;
+
+	/* DEFAULT_CGROUP_COLOC_ID is a reserved id */
+	if (grp_id == DEFAULT_CGROUP_COLOC_ID ||
+	    grp_id >= MAX_NUM_CGROUP_COLOC_ID)
+		return -EINVAL;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set preferred cluster for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+	set_preferred_cluster(grp, sched_cluster_id);
+
+	return 0;
+}
+
+struct cpumask *find_rtg_target(struct task_struct *p)
+{
+	struct related_thread_group *grp = NULL;
+	struct sched_cluster *preferred_cluster = NULL;
+	struct cpumask *rtg_target = NULL;
+
+	rcu_read_lock();
+	grp = task_related_thread_group(p);
+	rcu_read_unlock();
+
+	if (!grp)
+		return NULL;
+
+	preferred_cluster = grp->preferred_cluster;
+	if (!preferred_cluster)
+		return NULL;
+
+	rtg_target = &preferred_cluster->cpus;
+	if (!task_fits_max(p, cpumask_first(rtg_target)))
+		return NULL;
+
+	return rtg_target;
+}
+
+int find_rtg_cpu(struct task_struct *p)
+{
+	int i;
+	cpumask_t search_cpus = CPU_MASK_NONE;
+	int max_spare_cap_cpu = -1;
+	unsigned long max_spare_cap = 0;
+	int idle_backup_cpu = -1;
+	struct cpumask *preferred_cpus = find_rtg_target(p);
+
+	if (!preferred_cpus)
+		return -1;
+
+	cpumask_and(&search_cpus, p->cpus_ptr, cpu_online_mask);
+#ifdef CONFIG_CPU_ISOLATION_OPT
+	cpumask_andnot(&search_cpus, &search_cpus, cpu_isolated_mask);
+#endif
+
+	/* search the perferred idle cpu */
+	for_each_cpu_and(i, &search_cpus, preferred_cpus) {
+		if (is_reserved(i))
+			continue;
+
+		if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING))
+			return i;
+	}
+
+	for_each_cpu(i, &search_cpus) {
+		unsigned long spare_cap;
+
+		if (sched_cpu_high_irqload(i))
+			continue;
+
+		if (is_reserved(i))
+			continue;
+
+		/* take the Active LB CPU as idle_backup_cpu */
+		if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING)) {
+			/* find the idle_backup_cpu with max capacity */
+			if (idle_backup_cpu == -1 ||
+				capacity_orig_of(i) > capacity_orig_of(idle_backup_cpu))
+				idle_backup_cpu = i;
+
+			continue;
+		}
+
+		spare_cap = capacity_spare_without(i, p);
+		if (spare_cap > max_spare_cap) {
+			max_spare_cap = spare_cap;
+			max_spare_cap_cpu = i;
+		}
+	}
+
+	if (idle_backup_cpu != -1)
+		return idle_backup_cpu;
+
+	return max_spare_cap_cpu;
+}
+
 #ifdef CONFIG_SCHED_RTG_DEBUG
 #define seq_printf_rtg(m, x...) \
 do { \
@@ -532,6 +716,8 @@ static void print_rtg_info(struct seq_file *file,
 	const struct related_thread_group *grp)
 {
 	seq_printf_rtg(file, "RTG_ID          : %d\n", grp->id);
+	seq_printf_rtg(file, "RTG_CLUSTER     : %d\n",
+		grp->preferred_cluster ? grp->preferred_cluster->id : -1);
 }
 
 static char rtg_task_state_to_char(const struct task_struct *tsk)
diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h
index e32c67aebb96..a158ab74f292 100644
--- a/kernel/sched/rtg/rtg.h
+++ b/kernel/sched/rtg/rtg.h
@@ -8,6 +8,9 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 
+#define for_each_sched_cluster_reverse(cluster) \
+	list_for_each_entry_reverse(cluster, &cluster_head, list)
+
 #ifdef CONFIG_SCHED_RTG
 void init_task_rtg(struct task_struct *p);
 int alloc_related_thread_groups(void);
@@ -22,7 +25,16 @@ int sched_set_group_window_rollover(unsigned int grp_id);
 struct group_cpu_time *group_update_cpu_time(struct rq *rq,
 	struct related_thread_group *grp);
 void sched_update_rtg_tick(struct task_struct *p);
+int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p);
+int sched_set_group_preferred_cluster(unsigned int grp_id, int sched_cluster_id);
+struct cpumask *find_rtg_target(struct task_struct *p);
+int find_rtg_cpu(struct task_struct *p);
 #else
 static inline int alloc_related_thread_groups(void) { return 0; }
+static inline int sched_set_group_preferred_cluster(unsigned int grp_id,
+						    int sched_cluster_id)
+{
+	return 0;
+}
 #endif /* CONFIG_SCHED_RTG */
 #endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fdb69a9ad1f9..9630e3c00558 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -477,7 +477,6 @@ struct task_group {
 	/* Effective clamp values used for a task group */
 	struct uclamp_se	uclamp[UCLAMP_CNT];
 #endif
-
 };
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -2594,6 +2593,11 @@ static inline bool uclamp_is_used(void)
 #endif
 
 #ifdef CONFIG_SMP
+static inline unsigned long capacity_of(int cpu)
+{
+	return cpu_rq(cpu)->cpu_capacity;
+}
+
 static inline unsigned long capacity_orig_of(int cpu)
 {
 	return cpu_rq(cpu)->cpu_capacity_orig;
@@ -2748,6 +2752,13 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
 void swake_up_all_locked(struct swait_queue_head *q);
 void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
 
+#ifdef CONFIG_SCHED_RTG
+extern bool task_fits_max(struct task_struct *p, int cpu);
+extern unsigned long capacity_spare_without(int cpu, struct task_struct *p);
+extern int update_preferred_cluster(struct related_thread_group *grp,
+			struct task_struct *p, u32 old_load, bool from_tick);
+#endif
+
 #ifdef CONFIG_SCHED_WALT
 static inline int cluster_first_cpu(struct sched_cluster *cluster)
 {
diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h
index 84da97ccce20..a1fba5b65640 100644
--- a/kernel/sched/walt.h
+++ b/kernel/sched/walt.h
@@ -45,6 +45,11 @@ static inline struct sched_cluster *cpu_cluster(int cpu)
 	return cpu_rq(cpu)->cluster;
 }
 
+static inline int same_cluster(int src_cpu, int dst_cpu)
+{
+	return cpu_rq(src_cpu)->cluster == cpu_rq(dst_cpu)->cluster;
+}
+
 static inline u64 scale_exec_time(u64 delta, struct rq *rq)
 {
 	unsigned long capcurr = capacity_curr_of(cpu_of(rq));
@@ -243,6 +248,7 @@ static inline int sched_cpu_high_irqload(int cpu)
 {
 	return 0;
 }
+static inline int same_cluster(int src_cpu, int dst_cpu) { return 1; }
 #endif /* CONFIG_SCHED_WALT */
 
 #endif /* __WALT_H */
-- 
Gitee


From ffa3ddb88dd6f59899a000ac6a98e19a37092371 Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Mon, 14 Feb 2022 20:44:20 +0800
Subject: [PATCH 07/11] sched: Add interfaces for normalized utilization of
 related thread group

codeaurora inclusion
category: feature
issue: #I4SULH
CVE: NA

Signed-off-by: Li Ming <limingming.li@huawei.com>

-------------------------------------------

Use normalized util as RTG util and support the
RTG util invalid interval adjustable.

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
---
 include/linux/sched/rtg.h |   4 +
 kernel/sched/rtg/rtg.c    | 214 +++++++++++++++++++++++++++++++++++++-
 kernel/sched/rtg/rtg.h    |  11 +-
 kernel/sched/walt.c       |   2 +-
 4 files changed, 226 insertions(+), 5 deletions(-)

diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h
index eae7f83808ff..d27e1507e334 100644
--- a/include/linux/sched/rtg.h
+++ b/include/linux/sched/rtg.h
@@ -41,6 +41,10 @@ struct related_thread_group {
 	unsigned int window_size;
 	const struct rtg_class *rtg_class;
 	struct sched_cluster *preferred_cluster;
+	int max_boost;
+	unsigned long util_invalid_interval; /* in nanoseconds */
+	unsigned long util_update_timeout; /* in nanoseconds */
+	u64 last_util_update_time;
 };
 
 struct rtg_class {
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
index 200895617a71..016b2143ea8c 100644
--- a/kernel/sched/rtg/rtg.c
+++ b/kernel/sched/rtg/rtg.c
@@ -15,6 +15,8 @@
 #define REM_TASK	1
 
 #define DEFAULT_GROUP_RATE		60 /* 60FPS */
+#define DEFAULT_UTIL_INVALID_INTERVAL	(~0U) /* ns */
+#define DEFAULT_UTIL_UPDATE_TIMEOUT	20000000  /* ns */
 
 struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID];
 static DEFINE_RWLOCK(related_thread_group_lock);
@@ -57,6 +59,9 @@ int alloc_related_thread_groups(void)
 		INIT_LIST_HEAD(&grp->tasks);
 		INIT_LIST_HEAD(&grp->list);
 		grp->window_size = NSEC_PER_SEC / DEFAULT_GROUP_RATE;
+		grp->util_invalid_interval = DEFAULT_UTIL_INVALID_INTERVAL;
+		grp->util_update_timeout = DEFAULT_UTIL_UPDATE_TIMEOUT;
+		grp->max_boost = 0;
 		raw_spin_lock_init(&grp->lock);
 
 		related_thread_groups[i] = grp;
@@ -208,10 +213,15 @@ static void remove_task_from_group(struct task_struct *p)
 		grp->nr_running = 0;
 	}
 
-	if (!list_empty(&grp->tasks))
+	if (!list_empty(&grp->tasks)) {
 		empty_group = false;
-	else
+	} else {
+#ifdef CONFIG_UCLAMP_TASK
+		grp->max_boost = 0;
+#endif
 		_set_preferred_cluster(grp, -1);
+		grp->ravg.normalized_util = 0;
+	}
 
 	raw_spin_unlock_irqrestore(&grp->lock, irqflag);
 	__task_rq_unlock(rq, &flag);
@@ -234,6 +244,9 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
 	struct rq *rq = NULL;
 	struct rq_flags flag;
 	unsigned long irqflag;
+#ifdef CONFIG_UCLAMP_TASK
+	int boost;
+#endif
 
 	/*
 	 * Change p->grp under rq->lock. Will prevent races with read-side
@@ -252,6 +265,11 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
 					      sched_ktime_clock());
 	}
 
+#ifdef CONFIG_UCLAMP_TASK
+	boost = (int)uclamp_eff_value(p, UCLAMP_MIN);
+	if (boost > grp->max_boost)
+		grp->max_boost = boost;
+#endif
 	raw_spin_unlock_irqrestore(&grp->lock, irqflag);
 	__task_rq_unlock(rq, &flag);
 
@@ -328,9 +346,10 @@ unsigned int sched_get_group_id(struct task_struct *p)
 	return group_id;
 }
 
-void update_group_nr_running(struct task_struct *p, int event)
+void update_group_nr_running(struct task_struct *p, int event, u64 wallclock)
 {
 	struct related_thread_group *grp;
+	bool need_update = false;
 
 	rcu_read_lock();
 	grp = task_related_thread_group(p);
@@ -351,9 +370,17 @@ void update_group_nr_running(struct task_struct *p, int event)
 		grp->nr_running = 0;
 	}
 
+	/* update preferred cluster if no update long */
+	if (wallclock - grp->last_util_update_time > grp->util_update_timeout)
+		need_update = true;
+
 	raw_spin_unlock(&grp->lock);
 
 	rcu_read_unlock();
+
+	if (need_update && grp->rtg_class && grp->rtg_class->sched_update_rtg_tick &&
+	    grp->id != DEFAULT_CGROUP_COLOC_ID)
+		grp->rtg_class->sched_update_rtg_tick(grp);
 }
 
 int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size)
@@ -390,6 +417,10 @@ int sched_set_group_window_rollover(unsigned int grp_id)
 	struct related_thread_group *grp = NULL;
 	u64 wallclock;
 	unsigned long flag;
+#ifdef CONFIG_UCLAMP_TASK
+	struct task_struct *p = NULL;
+	int boost;
+#endif
 
 	grp = lookup_related_thread_group(grp_id);
 	if (!grp) {
@@ -402,6 +433,15 @@ int sched_set_group_window_rollover(unsigned int grp_id)
 	wallclock = sched_ktime_clock();
 	grp->prev_window_time = wallclock - grp->window_start;
 	grp->window_start = wallclock;
+	grp->max_boost = 0;
+
+#ifdef CONFIG_UCLAMP_TASK
+	list_for_each_entry(p, &grp->tasks, grp_list) {
+		boost = (int)uclamp_eff_value(p, UCLAMP_MIN);
+		if (boost > 0)
+			grp->max_boost = boost;
+	}
+#endif
 
 	group_time_rollover(&grp->ravg);
 	raw_spin_unlock_irqrestore(&grp->lock, flag);
@@ -703,6 +743,172 @@ int find_rtg_cpu(struct task_struct *p)
 	return max_spare_cap_cpu;
 }
 
+int sched_set_group_util_invalid_interval(unsigned int grp_id,
+					  unsigned int interval)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flag;
+
+	if (interval == 0)
+		return -EINVAL;
+
+	/* DEFAULT_CGROUP_COLOC_ID is a reserved id */
+	if (grp_id == DEFAULT_CGROUP_COLOC_ID ||
+	    grp_id >= MAX_NUM_CGROUP_COLOC_ID)
+		return -EINVAL;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set invalid interval for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flag);
+	if ((signed int)interval < 0)
+		grp->util_invalid_interval = DEFAULT_UTIL_INVALID_INTERVAL;
+	else
+		grp->util_invalid_interval = interval * NSEC_PER_MSEC;
+
+	raw_spin_unlock_irqrestore(&grp->lock, flag);
+
+	return 0;
+}
+
+static inline bool
+group_should_invalid_util(struct related_thread_group *grp, u64 now)
+{
+	if (grp->util_invalid_interval == DEFAULT_UTIL_INVALID_INTERVAL)
+		return false;
+
+	return true;
+}
+
+static inline bool valid_normalized_util(struct related_thread_group *grp)
+{
+	struct task_struct *p = NULL;
+	cpumask_t rtg_cpus = CPU_MASK_NONE;
+	bool valid = false;
+
+	if (grp->nr_running != 0) {
+		list_for_each_entry(p, &grp->tasks, grp_list) {
+			get_task_struct(p);
+			if (p->state == TASK_RUNNING)
+				cpumask_set_cpu(task_cpu(p), &rtg_cpus);
+			put_task_struct(p);
+		}
+
+		valid = cpumask_intersects(&rtg_cpus,
+					  &grp->preferred_cluster->cpus);
+	}
+
+	return valid;
+}
+
+void sched_get_max_group_util(const struct cpumask *query_cpus,
+			      unsigned long *util, unsigned int *freq)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long max_grp_util = 0;
+	unsigned int max_grp_freq = 0;
+	u64 now = ktime_get_ns();
+	unsigned long rtg_flag;
+	unsigned long flag;
+
+	/*
+	 *  sum the prev_runnable_sum for each rtg,
+	 *  return the max rtg->load
+	 */
+	read_lock_irqsave(&related_thread_group_lock, rtg_flag);
+	if (list_empty(&active_related_thread_groups))
+		goto unlock;
+
+	for_each_related_thread_group(grp) {
+		raw_spin_lock_irqsave(&grp->lock, flag);
+		if (!list_empty(&grp->tasks) &&
+		    grp->preferred_cluster != NULL &&
+		    cpumask_intersects(query_cpus,
+				       &grp->preferred_cluster->cpus) &&
+		    !group_should_invalid_util(grp, now)) {
+
+			if (grp->ravg.normalized_util > max_grp_util &&
+			    valid_normalized_util(grp))
+				max_grp_util = grp->ravg.normalized_util;
+		}
+		raw_spin_unlock_irqrestore(&grp->lock, flag);
+	}
+
+unlock:
+	read_unlock_irqrestore(&related_thread_group_lock, rtg_flag);
+
+	*freq = max_grp_freq;
+	*util = max_grp_util;
+}
+
+static struct sched_cluster *best_cluster(struct related_thread_group *grp)
+{
+	struct sched_cluster *cluster = NULL;
+	struct sched_cluster *max_cluster = NULL;
+	int cpu;
+	unsigned long util = grp->ravg.normalized_util;
+	unsigned long boosted_grp_util = util + grp->max_boost;
+	unsigned long max_cap = 0;
+	unsigned long cap = 0;
+
+	/* find new cluster */
+	for_each_sched_cluster(cluster) {
+		cpu = cpumask_first(&cluster->cpus);
+		cap = capacity_orig_of(cpu);
+		if (cap > max_cap) {
+			max_cap = cap;
+			max_cluster = cluster;
+		}
+
+		if (boosted_grp_util <= cap)
+			return cluster;
+	}
+
+	return max_cluster;
+}
+
+int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util,
+				    unsigned int flag)
+{
+	struct related_thread_group *grp = NULL;
+	u64 now;
+	unsigned long flags;
+	struct sched_cluster *preferred_cluster = NULL;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set normalized util for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flags);
+
+	if (list_empty(&grp->tasks)) {
+		raw_spin_unlock_irqrestore(&grp->lock, flags);
+		return 0;
+	}
+
+	grp->ravg.normalized_util = util;
+
+	preferred_cluster = best_cluster(grp);
+
+	/* update prev_cluster force when preferred_cluster changed */
+	if (!grp->preferred_cluster)
+		grp->preferred_cluster = preferred_cluster;
+	else if (grp->preferred_cluster != preferred_cluster)
+		grp->preferred_cluster = preferred_cluster;
+
+	now = ktime_get_ns();
+	grp->last_util_update_time = now;
+
+	raw_spin_unlock_irqrestore(&grp->lock, flags);
+
+	return 0;
+}
+
 #ifdef CONFIG_SCHED_RTG_DEBUG
 #define seq_printf_rtg(m, x...) \
 do { \
@@ -716,6 +922,8 @@ static void print_rtg_info(struct seq_file *file,
 	const struct related_thread_group *grp)
 {
 	seq_printf_rtg(file, "RTG_ID          : %d\n", grp->id);
+	seq_printf_rtg(file, "RTG_INTERVAL    : INVALID:%lums\n",
+		grp->util_invalid_interval / NSEC_PER_MSEC);
 	seq_printf_rtg(file, "RTG_CLUSTER     : %d\n",
 		grp->preferred_cluster ? grp->preferred_cluster->id : -1);
 }
diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h
index a158ab74f292..abd70d449ddb 100644
--- a/kernel/sched/rtg/rtg.h
+++ b/kernel/sched/rtg/rtg.h
@@ -16,7 +16,7 @@ void init_task_rtg(struct task_struct *p);
 int alloc_related_thread_groups(void);
 struct related_thread_group *lookup_related_thread_group(unsigned int group_id);
 struct related_thread_group *task_related_thread_group(struct task_struct *p);
-void update_group_nr_running(struct task_struct *p, int event);
+void update_group_nr_running(struct task_struct *p, int event, u64 wallclock);
 struct rq;
 void update_group_demand(struct task_struct *p, struct rq *rq,
 				int event, u64 wallclock);
@@ -29,6 +29,10 @@ int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p);
 int sched_set_group_preferred_cluster(unsigned int grp_id, int sched_cluster_id);
 struct cpumask *find_rtg_target(struct task_struct *p);
 int find_rtg_cpu(struct task_struct *p);
+int sched_set_group_util_invalid_interval(unsigned int grp_id,
+					  unsigned int interval);
+int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util,
+				    unsigned int flag);
 #else
 static inline int alloc_related_thread_groups(void) { return 0; }
 static inline int sched_set_group_preferred_cluster(unsigned int grp_id,
@@ -36,5 +40,10 @@ static inline int sched_set_group_preferred_cluster(unsigned int grp_id,
 {
 	return 0;
 }
+static inline int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util,
+				    unsigned int flag)
+{
+	return 0;
+}
 #endif /* CONFIG_SCHED_RTG */
 #endif
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index a2824cc9bc2e..40515b1bbdb7 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -1180,7 +1180,7 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
 	old_window_start = update_window_start(rq, wallclock, event);
 
 #ifdef CONFIG_SCHED_RTG
-	update_group_nr_running(p, event);
+	update_group_nr_running(p, event, wallclock);
 #endif
 	if (!p->ravg.mark_start)
 		goto done;
-- 
Gitee


From 9559e7e57b517576f3ed5890a6ceeebbdc9b012d Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Mon, 14 Feb 2022 14:55:36 +0800
Subject: [PATCH 08/11] sched: Add debugfs for sched cluster

codeaurora inclusion
category: feature
issue: #I4SULH
CVE: NA

Signed-off-by: Li Ming <limingming.li@huawei.com>

-------------------------------------------

Show the information of sched cluster in /proc/sched_cluster.

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
---
 kernel/sched/walt.c | 46 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index 40515b1bbdb7..8d4c79028d8a 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -1813,3 +1813,49 @@ void walt_sched_init_rq(struct rq *rq)
 	for (j = 0; j < NUM_TRACKED_WINDOWS; j++)
 		memset(&rq->load_subs[j], 0, sizeof(struct load_subtractions));
 }
+
+#define min_cap_cluster() \
+	list_first_entry(&cluster_head, struct sched_cluster, list)
+#define max_cap_cluster() \
+	list_last_entry(&cluster_head, struct sched_cluster, list)
+static int sched_cluster_debug_show(struct seq_file *file, void *param)
+{
+	struct sched_cluster *cluster = NULL;
+
+	seq_printf(file, "min_id:%d, max_id:%d\n",
+		min_cap_cluster()->id,
+		max_cap_cluster()->id);
+
+	for_each_sched_cluster(cluster) {
+		seq_printf(file, "id:%d, cpumask:%d(%*pbl)\n",
+			   cluster->id,
+			   cpumask_first(&cluster->cpus),
+			   cpumask_pr_args(&cluster->cpus));
+	}
+
+	return 0;
+}
+
+static int sched_cluster_debug_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_cluster_debug_show, NULL);
+}
+
+static const struct proc_ops sched_cluster_fops = {
+	.proc_open		= sched_cluster_debug_open,
+	.proc_read		= seq_read,
+	.proc_lseek		= seq_lseek,
+	.proc_release		= seq_release,
+};
+
+static int __init init_sched_cluster_debug_procfs(void)
+{
+	struct proc_dir_entry *pe = NULL;
+
+	pe = proc_create("sched_cluster",
+		0444, NULL, &sched_cluster_fops);
+	if (!pe)
+		return -ENOMEM;
+	return 0;
+}
+late_initcall(init_sched_cluster_debug_procfs);
-- 
Gitee


From b7f2b5a8b7a4f1d479b0960a49480912e2783ef8 Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Mon, 14 Feb 2022 18:01:31 +0800
Subject: [PATCH 09/11] sched: Support forced adjustment of CPU frequency
 according to the group util

codeaurora inclusion
category: feature
issue: #I4SULH
CVE: NA

Signed-off-by: Li Ming <limingming.li@huawei.com>

-------------------------------------------

Add SCHED_CPUFREQ_FORCE_UPDATE flag to support skip CPU frequency
scaling interval (rate_limit_us is 20ms by default) check.

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
---
 include/linux/sched/cpufreq.h    |  1 +
 include/linux/sched/rtg.h        |  7 +++
 kernel/sched/cpufreq_schedutil.c | 26 +++++++++--
 kernel/sched/rtg/rtg.c           | 79 ++++++++++++++++++++++++++++++--
 kernel/sched/rtg/rtg.h           |  8 ++++
 5 files changed, 114 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index c7cf63236f5b..94e7f84de227 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -11,6 +11,7 @@
 #define SCHED_CPUFREQ_IOWAIT	(1U << 0)
 #define SCHED_CPUFREQ_WALT	(1U << 1)
 #define SCHED_CPUFREQ_CONTINUE	(1U << 2)
+#define SCHED_CPUFREQ_FORCE_UPDATE	(1U << 3)
 
 #ifdef CONFIG_CPU_FREQ
 struct cpufreq_policy;
diff --git a/include/linux/sched/rtg.h b/include/linux/sched/rtg.h
index d27e1507e334..735b8ccae745 100644
--- a/include/linux/sched/rtg.h
+++ b/include/linux/sched/rtg.h
@@ -44,13 +44,20 @@ struct related_thread_group {
 	int max_boost;
 	unsigned long util_invalid_interval; /* in nanoseconds */
 	unsigned long util_update_timeout; /* in nanoseconds */
+	unsigned long freq_update_interval; /* in nanoseconds */
 	u64 last_util_update_time;
+	u64 last_freq_update_time;
 };
 
 struct rtg_class {
 	void (*sched_update_rtg_tick)(struct related_thread_group *grp);
 };
 
+enum rtg_freq_update_flags {
+	RTG_FREQ_FORCE_UPDATE = (1 << 0),
+	RTG_FREQ_NORMAL_UPDATE = (1 << 1),
+};
+
 int sched_set_group_id(struct task_struct *p, unsigned int group_id);
 unsigned int sched_get_group_id(struct task_struct *p);
 #endif /* CONFIG_SCHED_RTG */
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index cb72dc5c2002..742ed2fe50de 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -9,6 +9,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include "sched.h"
+#include "rtg/rtg.h"
 
 #include <linux/sched/cpufreq.h>
 #include <trace/events/power.h>
@@ -38,6 +39,10 @@ struct sugov_policy {
 	struct			mutex work_lock;
 	struct			kthread_worker worker;
 	struct task_struct	*thread;
+#ifdef CONFIG_SCHED_RTG
+	unsigned long rtg_util;
+	unsigned int rtg_freq;
+#endif
 	bool			work_in_progress;
 
 	bool			limits_changed;
@@ -448,13 +453,18 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	unsigned long util, max;
 	unsigned int next_f;
 	unsigned int cached_freq = sg_policy->cached_raw_freq;
+	bool force_update = false;
+
+#ifdef CONFIG_SCHED_RTG
+	force_update = flags & SCHED_CPUFREQ_FORCE_UPDATE;
+#endif
 
 	sugov_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
 
 	ignore_dl_rate_limit(sg_cpu, sg_policy);
 
-	if (!sugov_should_update_freq(sg_policy, time))
+	if (!force_update && !sugov_should_update_freq(sg_policy, time))
 		return;
 
 	util = sugov_get_util(sg_cpu);
@@ -507,6 +517,11 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 		}
 	}
 
+#ifdef CONFIG_SCHED_RTG
+	sched_get_max_group_util(policy->cpus, &sg_policy->rtg_util, &sg_policy->rtg_freq);
+	util = max(sg_policy->rtg_util, util);
+#endif
+
 	return get_next_freq(sg_policy, util, max);
 }
 
@@ -516,7 +531,11 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	unsigned int next_f;
+	bool force_update = false;
 
+#ifdef CONFIG_SCHED_RTG
+	force_update = flags & SCHED_CPUFREQ_FORCE_UPDATE;
+#endif
 	raw_spin_lock(&sg_policy->update_lock);
 
 	sugov_iowait_boost(sg_cpu, time, flags);
@@ -525,9 +544,10 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
 	ignore_dl_rate_limit(sg_cpu, sg_policy);
 
 #ifdef CONFIG_SCHED_WALT
-	if (sugov_should_update_freq(sg_policy, time) && !(flags & SCHED_CPUFREQ_CONTINUE)) {
+	if ((force_update || sugov_should_update_freq(sg_policy, time))
+			&& !(flags & SCHED_CPUFREQ_CONTINUE)) {
 #else
-	if (sugov_should_update_freq(sg_policy, time)) {
+	if (force_update || sugov_should_update_freq(sg_policy, time)) {
 #endif
 		next_f = sugov_next_freq_shared(sg_cpu, time);
 
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
index 016b2143ea8c..51b9c3fad7da 100644
--- a/kernel/sched/rtg/rtg.c
+++ b/kernel/sched/rtg/rtg.c
@@ -17,6 +17,7 @@
 #define DEFAULT_GROUP_RATE		60 /* 60FPS */
 #define DEFAULT_UTIL_INVALID_INTERVAL	(~0U) /* ns */
 #define DEFAULT_UTIL_UPDATE_TIMEOUT	20000000  /* ns */
+#define DEFAULT_FREQ_UPDATE_INTERVAL	8000000  /* ns */
 
 struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID];
 static DEFINE_RWLOCK(related_thread_group_lock);
@@ -62,6 +63,7 @@ int alloc_related_thread_groups(void)
 		grp->util_invalid_interval = DEFAULT_UTIL_INVALID_INTERVAL;
 		grp->util_update_timeout = DEFAULT_UTIL_UPDATE_TIMEOUT;
 		grp->max_boost = 0;
+		grp->freq_update_interval = DEFAULT_FREQ_UPDATE_INTERVAL;
 		raw_spin_lock_init(&grp->lock);
 
 		related_thread_groups[i] = grp;
@@ -780,7 +782,7 @@ group_should_invalid_util(struct related_thread_group *grp, u64 now)
 	if (grp->util_invalid_interval == DEFAULT_UTIL_INVALID_INTERVAL)
 		return false;
 
-	return true;
+	return (now - grp->last_freq_update_time >= grp->util_invalid_interval);
 }
 
 static inline bool valid_normalized_util(struct related_thread_group *grp)
@@ -870,13 +872,34 @@ static struct sched_cluster *best_cluster(struct related_thread_group *grp)
 	return max_cluster;
 }
 
+static bool group_should_update_freq(struct related_thread_group *grp,
+			      int cpu, unsigned int flags, u64 now)
+{
+	if (!grp)
+		return true;
+
+	if (flags & RTG_FREQ_FORCE_UPDATE) {
+		return true;
+	} else if (flags & RTG_FREQ_NORMAL_UPDATE) {
+		if (now - grp->last_freq_update_time >=
+		    grp->freq_update_interval)
+			return true;
+	}
+
+	return false;
+}
+
 int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util,
 				    unsigned int flag)
 {
 	struct related_thread_group *grp = NULL;
+	bool need_update_prev_freq = false;
+	bool need_update_next_freq = false;
 	u64 now;
 	unsigned long flags;
 	struct sched_cluster *preferred_cluster = NULL;
+	int prev_cpu;
+	int next_cpu;
 
 	grp = lookup_related_thread_group(grp_id);
 	if (!grp) {
@@ -896,16 +919,63 @@ int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util,
 	preferred_cluster = best_cluster(grp);
 
 	/* update prev_cluster force when preferred_cluster changed */
-	if (!grp->preferred_cluster)
+	if (!grp->preferred_cluster) {
 		grp->preferred_cluster = preferred_cluster;
-	else if (grp->preferred_cluster != preferred_cluster)
+	} else if (grp->preferred_cluster != preferred_cluster) {
+		prev_cpu = cpumask_first(&grp->preferred_cluster->cpus);
 		grp->preferred_cluster = preferred_cluster;
 
+		need_update_prev_freq = true;
+	}
+
+	if (grp->preferred_cluster != NULL)
+		next_cpu = cpumask_first(&grp->preferred_cluster->cpus);
+	else
+		next_cpu = 0;
+
 	now = ktime_get_ns();
 	grp->last_util_update_time = now;
+	need_update_next_freq =
+		group_should_update_freq(grp, next_cpu, flag, now);
+	if (need_update_next_freq)
+		grp->last_freq_update_time = now;
 
 	raw_spin_unlock_irqrestore(&grp->lock, flags);
 
+	if (need_update_prev_freq)
+		cpufreq_update_util(cpu_rq(prev_cpu),
+				SCHED_CPUFREQ_FORCE_UPDATE | SCHED_CPUFREQ_WALT);
+
+	if (need_update_next_freq)
+		cpufreq_update_util(cpu_rq(next_cpu),
+				SCHED_CPUFREQ_FORCE_UPDATE | SCHED_CPUFREQ_WALT);
+
+	return 0;
+}
+
+int sched_set_group_freq_update_interval(unsigned int grp_id, unsigned int interval)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flag;
+
+	if ((signed int)interval <= 0)
+		return -EINVAL;
+
+	/* DEFAULT_CGROUP_COLOC_ID is a reserved id */
+	if (grp_id == DEFAULT_CGROUP_COLOC_ID ||
+	    grp_id >= MAX_NUM_CGROUP_COLOC_ID)
+		return -EINVAL;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set update interval for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flag);
+	grp->freq_update_interval = interval * NSEC_PER_MSEC;
+	raw_spin_unlock_irqrestore(&grp->lock, flag);
+
 	return 0;
 }
 
@@ -922,7 +992,8 @@ static void print_rtg_info(struct seq_file *file,
 	const struct related_thread_group *grp)
 {
 	seq_printf_rtg(file, "RTG_ID          : %d\n", grp->id);
-	seq_printf_rtg(file, "RTG_INTERVAL    : INVALID:%lums\n",
+	seq_printf_rtg(file, "RTG_INTERVAL    : UPDATE:%lums#INVALID:%lums\n",
+		grp->freq_update_interval / NSEC_PER_MSEC,
 		grp->util_invalid_interval / NSEC_PER_MSEC);
 	seq_printf_rtg(file, "RTG_CLUSTER     : %d\n",
 		grp->preferred_cluster ? grp->preferred_cluster->id : -1);
diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h
index abd70d449ddb..23536c62859a 100644
--- a/kernel/sched/rtg/rtg.h
+++ b/kernel/sched/rtg/rtg.h
@@ -33,6 +33,10 @@ int sched_set_group_util_invalid_interval(unsigned int grp_id,
 					  unsigned int interval);
 int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util,
 				    unsigned int flag);
+void sched_get_max_group_util(const struct cpumask *query_cpus,
+			      unsigned long *util, unsigned int *freq);
+int sched_set_group_freq_update_interval(unsigned int grp_id,
+					 unsigned int interval);
 #else
 static inline int alloc_related_thread_groups(void) { return 0; }
 static inline int sched_set_group_preferred_cluster(unsigned int grp_id,
@@ -45,5 +49,9 @@ static inline int sched_set_group_normalized_util(unsigned int grp_id, unsigned
 {
 	return 0;
 }
+static inline void sched_get_max_group_util(const struct cpumask *query_cpus,
+			      unsigned long *util, unsigned int *freq)
+{
+}
 #endif /* CONFIG_SCHED_RTG */
 #endif
-- 
Gitee


From e2a541d6c6b538ab88da0a273e3faba31662a9cd Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Mon, 14 Feb 2022 18:38:18 +0800
Subject: [PATCH 10/11] sched: Support adding new tasks to the default group
 via cgroup attach

codeaurora inclusion
category: feature
issue: #I4SULH
CVE: NA

Signed-off-by: Li Ming <limingming.li@huawei.com>

-------------------------------------------

If uclamp.colocate of cpu cgroup is set, the new task
which belongs to cgroup will be added to the default
group (group_id = DEFAULT_CGROUP_COLOC_ID).

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
---
 kernel/sched/core.c      |  60 +++++++++++++++++++
 kernel/sched/rtg/Kconfig |   8 +++
 kernel/sched/rtg/rtg.c   | 125 +++++++++++++++++++++++++++++++++++++--
 kernel/sched/rtg/rtg.h   |   7 +++
 kernel/sched/sched.h     |  11 ++++
 5 files changed, 207 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 20dd5009e315..8e506f6efc73 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3452,6 +3452,8 @@ void wake_up_new_task(struct task_struct *p)
 	struct rq *rq;
 
 	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
+	add_new_task_to_grp(p);
+
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
 	/*
@@ -8060,6 +8062,11 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (IS_ERR(tg))
 		return ERR_PTR(-ENOMEM);
 
+#ifdef CONFIG_SCHED_RTG_CGROUP
+	tg->colocate = false;
+	tg->colocate_update_disabled = false;
+#endif
+
 	return &tg->css;
 }
 
@@ -8149,6 +8156,25 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
 	return ret;
 }
 
+#if defined(CONFIG_UCLAMP_TASK_GROUP) && defined(CONFIG_SCHED_RTG_CGROUP)
+static void schedgp_attach(struct cgroup_taskset *tset)
+{
+	struct task_struct *task;
+	struct cgroup_subsys_state *css;
+	bool colocate;
+	struct task_group *tg;
+
+	cgroup_taskset_first(tset, &css);
+	tg = css_tg(css);
+
+	colocate = tg->colocate;
+
+	cgroup_taskset_for_each(task, css, tset)
+		sync_cgroup_colocation(task, colocate);
+}
+#else
+static void schedgp_attach(struct cgroup_taskset *tset) { }
+#endif
 static void cpu_cgroup_attach(struct cgroup_taskset *tset)
 {
 	struct task_struct *task;
@@ -8156,6 +8182,8 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset)
 
 	cgroup_taskset_for_each(task, css, tset)
 		sched_move_task(task);
+
+	schedgp_attach(tset);
 }
 
 #ifdef CONFIG_UCLAMP_TASK_GROUP
@@ -8333,6 +8361,30 @@ static int cpu_uclamp_max_show(struct seq_file *sf, void *v)
 	cpu_uclamp_print(sf, UCLAMP_MAX);
 	return 0;
 }
+
+#ifdef CONFIG_SCHED_RTG_CGROUP
+static u64 sched_colocate_read(struct cgroup_subsys_state *css,
+				struct cftype *cft)
+{
+	struct task_group *tg = css_tg(css);
+
+	return (u64) tg->colocate;
+}
+
+static int sched_colocate_write(struct cgroup_subsys_state *css,
+				struct cftype *cft, u64 colocate)
+{
+	struct task_group *tg = css_tg(css);
+
+	if (tg->colocate_update_disabled)
+		return -EPERM;
+
+	tg->colocate = !!colocate;
+	tg->colocate_update_disabled = true;
+
+	return 0;
+}
+#endif /* CONFIG_SCHED_RTG_CGROUP */
 #endif /* CONFIG_UCLAMP_TASK_GROUP */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -8701,6 +8753,14 @@ static struct cftype cpu_legacy_files[] = {
 		.seq_show = cpu_uclamp_max_show,
 		.write = cpu_uclamp_max_write,
 	},
+#ifdef CONFIG_SCHED_RTG_CGROUP
+	{
+		.name = "uclamp.colocate",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = sched_colocate_read,
+		.write_u64 = sched_colocate_write,
+	},
+#endif
 #endif
 	{ }	/* Terminate */
 };
diff --git a/kernel/sched/rtg/Kconfig b/kernel/sched/rtg/Kconfig
index a96073631d16..3e5acad17ac5 100644
--- a/kernel/sched/rtg/Kconfig
+++ b/kernel/sched/rtg/Kconfig
@@ -14,4 +14,12 @@ config SCHED_RTG_DEBUG
 	help
 	  If set, debug node will show rtg threads
 
+config SCHED_RTG_CGROUP
+	bool "enable DEFAULT_CGROUP_COLOC RTG"
+	depends on SCHED_RTG
+	default n
+	help
+	  If set, support for adding the tasks which belong to
+	  co-located cgroup to DEFAULT_CGROUP_COLOC RTG.
+
 endmenu
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
index 51b9c3fad7da..e2cf2cdab65c 100644
--- a/kernel/sched/rtg/rtg.c
+++ b/kernel/sched/rtg/rtg.c
@@ -301,10 +301,18 @@ static int __sched_set_group_id(struct task_struct *p, unsigned int group_id)
 	 * In other cases, Switching from one group to another directly is not permitted.
 	 */
 	if (old_grp && group_id) {
-		pr_err("%s[%d] switching group from %d to %d failed.\n",
-		       p->comm, p->pid, old_grp->id, group_id);
-		rc = -EINVAL;
-		goto done;
+#ifdef CONFIG_SCHED_RTG_CGROUP
+		if (old_grp->id == DEFAULT_CGROUP_COLOC_ID) {
+			remove_task_from_group(p);
+		} else {
+#endif
+			pr_err("%s[%d] switching group from %d to %d failed.\n",
+			       p->comm, p->pid, old_grp->id, group_id);
+			rc = -EINVAL;
+			goto done;
+#ifdef CONFIG_SCHED_RTG_CGROUP
+		}
+#endif
 	}
 
 	if (!group_id) {
@@ -979,6 +987,115 @@ int sched_set_group_freq_update_interval(unsigned int grp_id, unsigned int inter
 	return 0;
 }
 
+#ifdef CONFIG_SCHED_RTG_CGROUP
+#ifdef CONFIG_UCLAMP_TASK_GROUP
+static inline bool uclamp_task_colocated(struct task_struct *p)
+{
+	struct cgroup_subsys_state *css;
+	struct task_group *tg;
+	bool colocate;
+
+	rcu_read_lock();
+	css = task_css(p, cpu_cgrp_id);
+	if (!css) {
+		rcu_read_unlock();
+		return false;
+	}
+	tg = container_of(css, struct task_group, css);
+	colocate = tg->colocate;
+	rcu_read_unlock();
+
+	return colocate;
+}
+#else
+static inline bool uclamp_task_colocated(struct task_struct *p)
+{
+	return false;
+}
+#endif /* CONFIG_UCLAMP_TASK_GROUP */
+
+void add_new_task_to_grp(struct task_struct *new)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flag;
+
+	/*
+	 * If the task does not belong to colocated schedtune
+	 * cgroup, nothing to do. We are checking this without
+	 * lock. Even if there is a race, it will be added
+	 * to the co-located cgroup via cgroup attach.
+	 */
+	if (!uclamp_task_colocated(new))
+		return;
+
+	grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
+	write_lock_irqsave(&related_thread_group_lock, flag);
+
+	/*
+	 * It's possible that someone already added the new task to the
+	 * group. or it might have taken out from the colocated schedtune
+	 * cgroup. check these conditions under lock.
+	 */
+	if (!uclamp_task_colocated(new) || new->grp) {
+		write_unlock_irqrestore(&related_thread_group_lock, flag);
+		return;
+	}
+
+	raw_spin_lock(&grp->lock);
+
+	rcu_assign_pointer(new->grp, grp);
+	list_add(&new->grp_list, &grp->tasks);
+
+	raw_spin_unlock(&grp->lock);
+	write_unlock_irqrestore(&related_thread_group_lock, flag);
+}
+
+
+/*
+ * We create a default colocation group at boot. There is no need to
+ * synchronize tasks between cgroups at creation time because the
+ * correct cgroup hierarchy is not available at boot. Therefore cgroup
+ * colocation is turned off by default even though the colocation group
+ * itself has been allocated. Furthermore this colocation group cannot
+ * be destroyted once it has been created. All of this has been as part
+ * of runtime optimizations.
+ *
+ * The job of synchronizing tasks to the colocation group is done when
+ * the colocation flag in the cgroup is turned on.
+ */
+static int __init create_default_coloc_group(void)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flags;
+
+	grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
+	write_lock_irqsave(&related_thread_group_lock, flags);
+	list_add(&grp->list, &active_related_thread_groups);
+	write_unlock_irqrestore(&related_thread_group_lock, flags);
+
+	return 0;
+}
+late_initcall(create_default_coloc_group);
+
+int sync_cgroup_colocation(struct task_struct *p, bool insert)
+{
+	unsigned int grp_id = insert ? DEFAULT_CGROUP_COLOC_ID : 0;
+	unsigned int old_grp_id;
+
+	if (p) {
+		old_grp_id = sched_get_group_id(p);
+		/*
+		 * If the task is already in a group which is not DEFAULT_CGROUP_COLOC_ID,
+		 * we should not change the group id during switch to background.
+		 */
+		if ((old_grp_id != DEFAULT_CGROUP_COLOC_ID) && (grp_id == 0))
+			return 0;
+	}
+
+	return __sched_set_group_id(p, grp_id);
+}
+#endif /* CONFIG_SCHED_RTG_CGROUP */
+
 #ifdef CONFIG_SCHED_RTG_DEBUG
 #define seq_printf_rtg(m, x...) \
 do { \
diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h
index 23536c62859a..4f0cedc332f0 100644
--- a/kernel/sched/rtg/rtg.h
+++ b/kernel/sched/rtg/rtg.h
@@ -37,6 +37,12 @@ void sched_get_max_group_util(const struct cpumask *query_cpus,
 			      unsigned long *util, unsigned int *freq);
 int sched_set_group_freq_update_interval(unsigned int grp_id,
 					 unsigned int interval);
+#ifdef CONFIG_SCHED_RTG_CGROUP
+int sync_cgroup_colocation(struct task_struct *p, bool insert);
+void add_new_task_to_grp(struct task_struct *new);
+#else
+static inline void add_new_task_to_grp(struct task_struct *new) {}
+#endif /* CONFIG_SCHED_RTG_CGROUP */
 #else
 static inline int alloc_related_thread_groups(void) { return 0; }
 static inline int sched_set_group_preferred_cluster(unsigned int grp_id,
@@ -53,5 +59,6 @@ static inline void sched_get_max_group_util(const struct cpumask *query_cpus,
 			      unsigned long *util, unsigned int *freq)
 {
 }
+static inline void add_new_task_to_grp(struct task_struct *new) {}
 #endif /* CONFIG_SCHED_RTG */
 #endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9630e3c00558..d79744dcc048 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -477,6 +477,17 @@ struct task_group {
 	/* Effective clamp values used for a task group */
 	struct uclamp_se	uclamp[UCLAMP_CNT];
 #endif
+
+#ifdef CONFIG_SCHED_RTG_CGROUP
+	/*
+	 * Controls whether tasks of this cgroup should be colocated with each
+	 * other and tasks of other cgroups that have the same flag turned on.
+	 */
+	bool colocate;
+
+	/* Controls whether further updates are allowed to the colocate flag */
+	bool colocate_update_disabled;
+#endif
 };
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-- 
Gitee


From 1f0d48afb6ced6e61855f6a97a8dafb022676294 Mon Sep 17 00:00:00 2001
From: Li Ming <limingming.li@huawei.com>
Date: Mon, 14 Feb 2022 18:46:40 +0800
Subject: [PATCH 11/11] sched: Add trace points for related thread group
 scheduling

codeaurora inclusion
category: feature
issue: #I4SULH
CVE: NA

Signed-off-by: Li Ming <limingming.li@huawei.com>

-------------------------------------------

Add find_rtg_cpu/sched_rtg_task_each/sched_rtg_valid_normalized_util
trace points for cpu selection.

Signed-off-by: Vikram Mulukutla <markivx@codeaurora.org>
Signed-off-by: Satya Durga Srinivasu Prabhala <satyap@codeaurora.org>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
---
 include/trace/events/rtg.h | 117 +++++++++++++++++++++++++++++++++++++
 kernel/sched/rtg/rtg.c     |  15 ++++-
 2 files changed, 130 insertions(+), 2 deletions(-)
 create mode 100644 include/trace/events/rtg.h

diff --git a/include/trace/events/rtg.h b/include/trace/events/rtg.h
new file mode 100644
index 000000000000..12422d2c3ee2
--- /dev/null
+++ b/include/trace/events/rtg.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rtg
+
+#if !defined(_TRACE_RTG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RTG_H
+
+#include <linux/trace_seq.h>
+#include <linux/tracepoint.h>
+
+struct rq;
+
+TRACE_EVENT(find_rtg_cpu,
+
+	TP_PROTO(struct task_struct *p, const struct cpumask *perferred_cpumask,
+		 char *msg, int cpu),
+
+	TP_ARGS(p, perferred_cpumask, msg, cpu),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(pid_t, pid)
+		__bitmask(cpus,	num_possible_cpus())
+		__array(char, msg, TASK_COMM_LEN)
+		__field(int, cpu)
+	),
+
+	TP_fast_assign(
+		__entry->pid = p->pid;
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__assign_bitmask(cpus, cpumask_bits(perferred_cpumask), num_possible_cpus());
+		memcpy(__entry->msg, msg, min((size_t)TASK_COMM_LEN, strlen(msg)+1));
+		__entry->cpu = cpu;
+	),
+
+	TP_printk("comm=%s pid=%d perferred_cpus=%s reason=%s target_cpu=%d",
+		__entry->comm, __entry->pid, __get_bitmask(cpus), __entry->msg, __entry->cpu)
+);
+
+TRACE_EVENT(sched_rtg_task_each,
+
+	TP_PROTO(unsigned int id, unsigned int nr_running, struct task_struct *task),
+
+	TP_ARGS(id, nr_running, task),
+
+	TP_STRUCT__entry(
+		__field(unsigned int,	id)
+		__field(unsigned int,	nr_running)
+		__array(char,	comm,	TASK_COMM_LEN)
+		__field(pid_t,		pid)
+		__field(int,		prio)
+		__bitmask(allowed, num_possible_cpus())
+		__field(int,		cpu)
+		__field(int,		state)
+		__field(bool,		on_rq)
+		__field(int,		on_cpu)
+	),
+
+	TP_fast_assign(
+		__entry->id		= id;
+		__entry->nr_running	= nr_running;
+		memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+		__entry->pid		= task->pid;
+		__entry->prio		= task->prio;
+		__assign_bitmask(allowed, cpumask_bits(&task->cpus_mask), num_possible_cpus());
+		__entry->cpu		= task_cpu(task);
+		__entry->state		= task->state;
+		__entry->on_rq		= task->on_rq;
+		__entry->on_cpu		= task->on_cpu;
+	),
+
+	TP_printk("comm=%s pid=%d prio=%d allowed=%s cpu=%d state=%s%s on_rq=%d on_cpu=%d",
+		__entry->comm, __entry->pid, __entry->prio, __get_bitmask(allowed), __entry->cpu,
+		__entry->state & (TASK_REPORT_MAX) ?
+		__print_flags(__entry->state & (TASK_REPORT_MAX), "|",
+				{ TASK_INTERRUPTIBLE, "S" },
+				{ TASK_UNINTERRUPTIBLE, "D" },
+				{ __TASK_STOPPED, "T" },
+				{ __TASK_TRACED, "t" },
+				{ EXIT_DEAD, "X" },
+				{ EXIT_ZOMBIE, "Z" },
+				{ TASK_DEAD, "x" },
+				{ TASK_WAKEKILL, "K"},
+				{ TASK_WAKING, "W"}) : "R",
+		__entry->state & TASK_STATE_MAX ? "+" : "",
+		__entry->on_rq, __entry->on_cpu)
+);
+
+TRACE_EVENT(sched_rtg_valid_normalized_util,
+
+	TP_PROTO(unsigned int id, unsigned int nr_running,
+		 const struct cpumask *rtg_cpus, unsigned int valid),
+
+	TP_ARGS(id, nr_running, rtg_cpus, valid),
+
+	TP_STRUCT__entry(
+		__field(unsigned int,	id)
+		__field(unsigned int,	nr_running)
+		__bitmask(cpus,	num_possible_cpus())
+		__field(unsigned int,	valid)
+	),
+
+	TP_fast_assign(
+		__entry->id		= id;
+		__entry->nr_running	= nr_running;
+		__assign_bitmask(cpus, cpumask_bits(rtg_cpus), num_possible_cpus());
+		__entry->valid		= valid;
+	),
+
+	TP_printk("id=%d nr_running=%d cpus=%s valid=%d",
+		__entry->id, __entry->nr_running,
+		__get_bitmask(cpus), __entry->valid)
+);
+#endif /* _TRACE_RTG_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
index e2cf2cdab65c..dabadd54e59c 100644
--- a/kernel/sched/rtg/rtg.c
+++ b/kernel/sched/rtg/rtg.c
@@ -6,6 +6,9 @@
 #include <linux/sched.h>
 #include <linux/cpumask.h>
 #include <trace/events/walt.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/rtg.h>
+#undef CREATE_TRACE_POINTS
 
 #include "../sched.h"
 #include "rtg.h"
@@ -717,8 +720,10 @@ int find_rtg_cpu(struct task_struct *p)
 		if (is_reserved(i))
 			continue;
 
-		if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING))
+		if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING)) {
+			trace_find_rtg_cpu(p, preferred_cpus, "prefer_idle", i);
 			return i;
+		}
 	}
 
 	for_each_cpu(i, &search_cpus) {
@@ -747,8 +752,12 @@ int find_rtg_cpu(struct task_struct *p)
 		}
 	}
 
-	if (idle_backup_cpu != -1)
+	if (idle_backup_cpu != -1) {
+		trace_find_rtg_cpu(p, preferred_cpus, "idle_backup", idle_backup_cpu);
 		return idle_backup_cpu;
+	}
+
+	trace_find_rtg_cpu(p, preferred_cpus, "max_spare", max_spare_cap_cpu);
 
 	return max_spare_cap_cpu;
 }
@@ -804,12 +813,14 @@ static inline bool valid_normalized_util(struct related_thread_group *grp)
 			get_task_struct(p);
 			if (p->state == TASK_RUNNING)
 				cpumask_set_cpu(task_cpu(p), &rtg_cpus);
+			trace_sched_rtg_task_each(grp->id, grp->nr_running, p);
 			put_task_struct(p);
 		}
 
 		valid = cpumask_intersects(&rtg_cpus,
 					  &grp->preferred_cluster->cpus);
 	}
+	trace_sched_rtg_valid_normalized_util(grp->id, grp->nr_running, &rtg_cpus, valid);
 
 	return valid;
 }
-- 
Gitee