diff --git a/0001-sched-enable-the-group-identity.patch b/0001-sched-enable-the-group-identity.patch new file mode 100644 index 0000000000000000000000000000000000000000..618d8398c9d4cbd8c8be538cf653728744d546dd --- /dev/null +++ b/0001-sched-enable-the-group-identity.patch @@ -0,0 +1,672 @@ +From bb436a218e495ddc7841a060cc80c362d9a7b430 Mon Sep 17 00:00:00 2001 +From: Cruz Zhao +Date: Mon, 5 Dec 2022 16:02:37 +0800 +Subject: [PATCH 1/4] sched: enable the group identity + +Signed-off-by: Erwei Deng +Signed-off-by: Cruz Zhao +--- + .config | 2 +- + .../include/linux/sched.h | 24 ++- + .../kernel/sched/mod/Makefile | 1 + + .../kernel/sched/mod/core.c | 57 +----- + .../kernel/sched/mod/debug.c | 4 +- + .../kernel/sched/mod/fair.c | 14 +- + .../kernel/sched/mod/main.c | 171 ++++++++++++++++++ + .../kernel/sched/mod/sched.h | 84 +++++---- + .../kernel/sched/mod/sched_rebuild.c | 13 +- + 9 files changed, 270 insertions(+), 100 deletions(-) + +diff --git a/.config b/.config +index fd7a2f502..fb61c96dd 100644 +--- a/.config ++++ b/.config +@@ -165,7 +165,7 @@ CONFIG_BLK_CGROUP=y + CONFIG_CGROUP_WRITEBACK=y + CONFIG_CGROUP_SCHED=y + CONFIG_FAIR_GROUP_SCHED=y +-# CONFIG_GROUP_IDENTITY is not set ++CONFIG_GROUP_IDENTITY=y + CONFIG_CFS_BANDWIDTH=y + CONFIG_RT_GROUP_SCHED=y + CONFIG_CGROUP_PIDS=y +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 0780f5de3..24f9d3333 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -480,9 +480,6 @@ struct sched_statistics { + u64 nr_migrations_cold; + u64 nr_failed_migrations_affine; + u64 nr_failed_migrations_running; +-#ifdef CONFIG_GROUP_IDENTITY +- u64 nr_failed_migrations_id; +-#endif + u64 nr_failed_migrations_hot; + u64 nr_forced_migrations; + +@@ -496,7 +493,9 @@ struct sched_statistics { + u64 nr_wakeups_passive; + u64 nr_wakeups_idle; + +- CK_HOTFIX_RESERVE(1) ++#ifdef CONFIG_GROUP_IDENTITY ++ u64 nr_failed_migrations_id; ++#endif + CK_HOTFIX_RESERVE(2) + CK_HOTFIX_RESERVE(3) + CK_HOTFIX_RESERVE(4) +@@ -548,12 +547,6 @@ struct sched_entity { + unsigned long runnable_weight; + #endif + +-#ifdef CONFIG_GROUP_IDENTITY +- int id_flags; +-#ifdef CONFIG_SCHED_SMT +- struct list_head expel_node; +-#endif +-#endif + + #ifdef CONFIG_SMP + /* +@@ -565,9 +558,20 @@ struct sched_entity { + struct sched_avg avg; + #endif + ++#ifdef CONFIG_GROUP_IDENTITY ++ int id_flags; ++#ifdef CONFIG_SCHED_SMT ++ struct list_head expel_node; ++#else ++ CK_HOTFIX_RESERVE(2) ++ CK_HOTFIX_RESERVE(3) ++#endif ++#else + CK_HOTFIX_RESERVE(1) + CK_HOTFIX_RESERVE(2) + CK_HOTFIX_RESERVE(3) ++#endif ++ + CK_HOTFIX_RESERVE(4) + CK_HOTFIX_RESERVE(5) + CK_HOTFIX_RESERVE(6) +diff --git a/kernel/sched/mod/Makefile b/kernel/sched/mod/Makefile +index 7dbc6f4cf..a120189e9 100644 +--- a/kernel/sched/mod/Makefile ++++ b/kernel/sched/mod/Makefile +@@ -52,6 +52,7 @@ $(obj)/%.stub.o: $(src)/%.c FORCE + + GET_STACK_SIZE: $(obj)/core.stub.o + $(eval ccflags-y += $(shell bash $(plugsched_tmpdir)/springboard_search.sh build $<)) ++ $(info $(ccflags-y)) + + $(obj)/.globalize: $(src)/export_jump.h $(obj-stub) FORCE + $(cmd_find_sym) +diff --git a/kernel/sched/mod/core.c b/kernel/sched/mod/core.c +index 92486d68b..403734a08 100644 +--- a/kernel/sched/mod/core.c ++++ b/kernel/sched/mod/core.c +@@ -2910,13 +2910,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) + p->se.cfs_rq = NULL; + #endif + +-#ifdef CONFIG_GROUP_IDENTITY +- p->se.id_flags = 0; +-#ifdef CONFIG_SCHED_SMT +- INIT_LIST_HEAD(&p->se.expel_node); +-#endif +-#endif +- + #ifdef CONFIG_SCHEDSTATS + /* Even if schedstat is disabled, there should not be garbage */ + memset(&p->se.statistics, 0, sizeof(p->se.statistics)); +@@ -3172,6 +3165,12 @@ void wake_up_new_task(struct task_struct *p) + update_rq_clock(rq); + post_init_entity_util_avg(p); + ++#ifdef CONFIG_GROUP_IDENTITY ++ p->se.id_flags = 0; ++#ifdef CONFIG_SCHED_SMT ++ INIT_LIST_HEAD(&p->se.expel_node); ++#endif ++#endif + activate_task(rq, p, ENQUEUE_NOCLOCK); + trace_sched_wakeup_new(p); + check_preempt_curr(rq, p, WF_FORK); +@@ -7614,7 +7613,7 @@ extern u64 cpu_rt_period_read_uint(struct cgroup_subsys_state *, struct cftype * + #endif /* CONFIG_RT_GROUP_SCHED */ + + #ifdef CONFIG_GROUP_IDENTITY +-static int cpu_bvt_warp_ns_write_s64(struct cgroup_subsys_state *css, ++int cpu_bvt_warp_ns_write_s64(struct cgroup_subsys_state *css, + struct cftype *cftype, s64 val) + { + struct task_group *tg = css_tg(css); +@@ -7622,7 +7621,7 @@ static int cpu_bvt_warp_ns_write_s64(struct cgroup_subsys_state *css, + return update_bvt_warp_ns(tg, val); + } + +-static s64 cpu_bvt_warp_ns_read_s64(struct cgroup_subsys_state *css, ++s64 cpu_bvt_warp_ns_read_s64(struct cgroup_subsys_state *css, + struct cftype *cft) + { + struct task_group *tg = css_tg(css); +@@ -7630,7 +7629,7 @@ static s64 cpu_bvt_warp_ns_read_s64(struct cgroup_subsys_state *css, + return tg->bvt_warp_ns; + } + +-static int cpu_identity_write_s64(struct cgroup_subsys_state *css, ++int cpu_identity_write_s64(struct cgroup_subsys_state *css, + struct cftype *cftype, s64 val) + { + struct task_group *tg = css_tg(css); +@@ -7638,7 +7637,7 @@ static int cpu_identity_write_s64(struct cgroup_subsys_state *css, + return update_identity(tg, val); + } + +-static s64 cpu_identity_read_s64(struct cgroup_subsys_state *css, ++s64 cpu_identity_read_s64(struct cgroup_subsys_state *css, + struct cftype *cft) + { + struct task_group *tg = css_tg(css); +@@ -7706,42 +7705,6 @@ static __used struct cftype cpu_legacy_files[] = { + .seq_show = cpu_uclamp_max_show, + .write = cpu_uclamp_max_write, + }, +-#endif +-#ifdef CONFIG_GROUP_IDENTITY +- /* legacy bvt interface +- * +- * BVT(Borrowed Virtual Time) is derived from paper: +- * "Borrowed-virtual-time (BVT) scheduling: supporting +- * latency-sensitive threads in a general-purpose scheduler" +- * Link: https://dl.acm.org/doi/abs/10.1145/319344.319169 +- * +- * Jacob Leverich implemented the idea of this paper, and +- * 'bvt_warp_ns' interface is derived from Leverich's code. +- * Link: https://gist.github.com/leverich/5913713. +- * +- * Now we have reformed the whole idea, and only reserved +- * the name of 'bvt_warp_ns' to be compalitible. +- * +- * 'bvt_warp_ns' will be converted into identity when written. +- * The correspondence of bvt_warp_ns and identity follows: +- * bvt identity value identity +- * -2 9 ID_UNDERCLASS | ID_IDLE_SAVER +- * -1 9 ID_UNDERCLASS | ID_IDLE_SAVER +- * 0 0 ID_NORMAL +- * 1 18 ID_HIGHCLASS | ID_IDLE_SEEKER +- * 2 22 ID_HIGHCLASS | ID_IDLE_SEEKER | ID_SMT_EXPELLER +- * +- */ +- { +- .name = "bvt_warp_ns", +- .read_s64 = cpu_bvt_warp_ns_read_s64, +- .write_s64 = cpu_bvt_warp_ns_write_s64, +- }, +- { +- .name = "identity", +- .read_s64 = cpu_identity_read_s64, +- .write_s64 = cpu_identity_write_s64, +- }, + #endif + { } /* Terminate */ + }; +diff --git a/kernel/sched/mod/debug.c b/kernel/sched/mod/debug.c +index 0d079f029..5ba63f0b0 100644 +--- a/kernel/sched/mod/debug.c ++++ b/kernel/sched/mod/debug.c +@@ -731,8 +731,8 @@ do { \ + P(smt_expeller); + P(on_expel); + #endif +- PN(high_exec_sum); +- PN(under_exec_sum); ++ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "high_exec_sum", per_cpu(high_exec_sum, cpu_of(rq))); ++ SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "under_exec_sum", per_cpu(under_exec_sum, cpu_of(rq))); + #endif + P(nr_switches); + P(nr_uninterruptible); +diff --git a/kernel/sched/mod/fair.c b/kernel/sched/mod/fair.c +index fa54d03e2..d651430f2 100644 +--- a/kernel/sched/mod/fair.c ++++ b/kernel/sched/mod/fair.c +@@ -136,6 +136,12 @@ unsigned int sysctl_sched_bvt_place_epsilon = 1000000UL; + */ + unsigned int sysctl_sched_idle_saver_wmark; + ++#ifdef CONFIG_GROUP_IDENTITY ++DEFINE_PER_CPU(u64, high_exec_sum); ++DEFINE_PER_CPU(u64, under_exec_sum); ++DEFINE_PER_CPU(u64, under_exec_stamp); ++#endif ++ + #ifdef CONFIG_SCHED_SMT + /* + * When CPU is full of expellee and on expel, it actually can +@@ -1568,22 +1574,22 @@ void update_id_idle_avg(struct rq *rq, u64 delta) + s64 diff; + u64 max = sysctl_sched_idle_saver_wmark; + +- delta += rq->under_exec_sum - rq->under_exec_stamp; ++ delta += per_cpu(under_exec_sum, cpu_of(rq)) - per_cpu(under_exec_stamp, cpu_of(rq)); + diff = delta - rq->avg_id_idle; + rq->avg_id_idle += diff >> 3; + + if (rq->avg_id_idle > max) + rq->avg_id_idle = max; + +- rq->under_exec_stamp = rq->under_exec_sum; ++ per_cpu(under_exec_stamp, cpu_of(rq)) = per_cpu(under_exec_sum, cpu_of(rq)); + } + + static inline void id_update_exec(struct rq *rq, u64 delta_exec) + { + if (is_underclass_task(rq->curr)) +- rq->under_exec_sum += delta_exec; ++ per_cpu(under_exec_sum, cpu_of(rq)) += delta_exec; + else if (is_highclass_task(rq->curr)) +- rq->high_exec_sum += delta_exec; ++ per_cpu(high_exec_sum, cpu_of(rq)) += delta_exec; + } + + #ifdef CONFIG_SCHED_SMT +diff --git a/kernel/sched/mod/main.c b/kernel/sched/mod/main.c +index 8e08642c5..e650fd035 100644 +--- a/kernel/sched/mod/main.c ++++ b/kernel/sched/mod/main.c +@@ -151,9 +151,77 @@ static void disable_stack_protector(void) + static void disable_stack_protector(void) { } + #endif + ++static void gi_reset_reserve_field(bool install) ++{ ++#ifdef CONFIG_GROUP_IDENTITY ++ struct task_struct *p, *t; ++ struct task_group *tg; ++ int cpu; ++ struct cfs_rq *cfs_rq; ++ ++ for_each_possible_cpu(cpu) { ++ per_cpu(high_exec_sum, cpu) = 0; ++ per_cpu(under_exec_sum, cpu) = 0; ++ per_cpu(under_exec_stamp, cpu) = 0; ++ cpu_rq(cpu)->avg_id_idle = 0; ++ cpu_rq(cpu)->smt_expeller = false; ++ cpu_rq(cpu)->smt_expellee = false; ++ cpu_rq(cpu)->on_expel = false; ++#ifdef CONFIG_SCHED_SMT ++ cpu_rq(cpu)->next_expel_ib = 0; ++ cpu_rq(cpu)->next_expel_update = 0; ++#endif ++ } ++ ++ for_each_process_thread(p, t) { ++ t->se.statistics.nr_failed_migrations_id = 0; ++ t->se.id_flags = 0; ++#ifdef CONFIG_SCHED_SMT ++ if (install) ++ INIT_LIST_HEAD(&t->se.expel_node); ++ else ++ memset(&t->se.expel_node, 0, sizeof(struct list_head)); ++#endif ++ } ++ ++ list_for_each_entry_rcu(tg, &task_groups, list) { ++ tg->bvt_warp_ns = 0; ++ tg->id_flags = 0; ++ ++ for_each_possible_cpu(cpu) { ++ cfs_rq = tg->cfs_rq[cpu]; ++ cfs_rq->min_under_vruntime = (u64)(-(1LL << 20)); ++ if (install) ++ cfs_rq->under_timeline = RB_ROOT_CACHED; ++ else ++ memset(&cfs_rq->under_timeline, 0, sizeof(struct rb_root_cached)); ++#ifdef CONFIG_SCHED_SMT ++ cfs_rq->expel_spread = 0; ++ cfs_rq->expel_start = 0; ++ if (install) ++ INIT_LIST_HEAD(&cfs_rq->expel_list); ++ else ++ memset(&cfs_rq->expel_list, 0, sizeof(struct list_head)); ++#endif ++ if (tg == &root_task_group) ++ continue; ++#ifdef CONFIG_SCHED_SMT ++ if (install) ++ INIT_LIST_HEAD(&tg->se[cpu]->expel_node); ++ else ++ memset(&tg->se[cpu]->expel_node, 0, sizeof(struct list_head)); ++#endif ++ tg->se[cpu]->statistics.nr_failed_migrations_id = 0; ++ tg->se[cpu]->id_flags = 0; ++ } ++ } ++#endif ++} ++ + static int __sync_sched_install(void *arg) + { + int error; ++ struct task_struct *p, *t; + + if (is_first_process()) { + stop_time_p0 = ktime_get(); +@@ -192,6 +260,7 @@ static int __sync_sched_install(void *arg) + disable_stack_protector(); + sched_alloc_extrapad(); + reset_balance_callback(); ++ gi_reset_reserve_field(true); + } + + atomic_dec(&redirect_finished); +@@ -239,6 +308,7 @@ static int __sync_sched_restore(void *arg) + JUMP_OPERATION(remove); + reset_balance_callback(); + sched_free_extrapad(); ++ gi_reset_reserve_field(false); + } + + atomic_dec(&redirect_finished); +@@ -385,6 +455,97 @@ static void report_detail_time(char *ops) + ktime_to_ns(ktime_sub(main_end, main_start))); + } + ++extern int cpu_bvt_warp_ns_write_s64(struct cgroup_subsys_state *css, ++ struct cftype *cftype, s64 val); ++extern s64 cpu_bvt_warp_ns_read_s64(struct cgroup_subsys_state *css, ++ struct cftype *cft); ++extern int cpu_identity_write_s64(struct cgroup_subsys_state *css, ++ struct cftype *cftype, s64 val); ++extern s64 cpu_identity_read_s64(struct cgroup_subsys_state *css, ++ struct cftype *cft); ++static struct cftype cpu_gi_files[] = { ++#ifdef CONFIG_GROUP_IDENTITY ++ /* legacy bvt interface ++ * ++ * BVT(Borrowed Virtual Time) is derived from paper: ++ * "Borrowed-virtual-time (BVT) scheduling: supporting ++ * latency-sensitive threads in a general-purpose scheduler" ++ * Link: https://dl.acm.org/doi/abs/10.1145/319344.319169 ++ * ++ * Jacob Leverich implemented the idea of this paper, and ++ * 'bvt_warp_ns' interface is derived from Leverich's code. ++ * Link: https://gist.github.com/leverich/5913713. ++ * ++ * Now we have reformed the whole idea, and only reserved ++ * the name of 'bvt_warp_ns' to be compalitible. ++ * ++ * 'bvt_warp_ns' will be converted into identity when written. ++ * The correspondence of bvt_warp_ns and identity follows: ++ * bvt identity value identity ++ * -2 9 ID_UNDERCLASS | ID_IDLE_SAVER ++ * -1 9 ID_UNDERCLASS | ID_IDLE_SAVER ++ * 0 0 ID_NORMAL ++ * 1 18 ID_HIGHCLASS | ID_IDLE_SEEKER ++ * 2 22 ID_HIGHCLASS | ID_IDLE_SEEKER | ID_SMT_EXPELLER ++ * ++ */ ++ { ++ .name = "bvt_warp_ns", ++ .read_s64 = cpu_bvt_warp_ns_read_s64, ++ .write_s64 = cpu_bvt_warp_ns_write_s64, ++ }, ++ { ++ .name = "identity", ++ .read_s64 = cpu_identity_read_s64, ++ .write_s64 = cpu_identity_write_s64, ++ }, ++#endif ++ { } /* Terminate */ ++}; ++ ++static struct ctl_table_header *gi_sysctl_header; ++static struct ctl_path kernel_path[] = { { .procname = "kernel", }, { } }; ++static struct ctl_table gi_table[] = { ++#ifdef CONFIG_GROUP_IDENTITY ++ { ++ /* ++ * Variable 'sysctl_sched_bvt_place_epsilon' is derived from ++ * https://gist.github.com/leverich/5913713. ++ * Author: Jacob Leverich ++ */ ++ .procname = "sched_bvt_place_epsilon", ++ .data = &sysctl_sched_bvt_place_epsilon, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++#ifdef CONFIG_SCHED_SMT ++ { ++ .procname = "sched_expel_idle_balance_delay", ++ .data = &sysctl_sched_expel_idle_balance_delay, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++ { ++ .procname = "sched_idle_saver_wmark", ++ .data = &sysctl_sched_idle_saver_wmark, ++ .maxlen = sizeof(unsigned int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++ { ++ .procname = "sched_expel_update_interval", ++ .data = &sysctl_sched_expel_update_interval, ++ .maxlen = sizeof(unsigned long), ++ .mode = 0644, ++ .proc_handler = proc_dointvec, ++ }, ++#endif ++#endif ++ { } ++}; ++ + static int load_sched_routine(void) + { + int ret; +@@ -421,6 +582,11 @@ static int load_sched_routine(void) + install_sched_debug_procfs(); + install_sched_debugfs(); + #endif ++#ifdef CONFIG_GROUP_IDENTITY ++ cgroup_add_legacy_cftypes(&cpu_cgrp_subsys, cpu_gi_files); ++ WARN_ON(gi_sysctl_header); ++ gi_sysctl_header = register_sysctl_paths(kernel_path, gi_table); ++#endif + + main_end = ktime_get(); + report_detail_time("load"); +@@ -453,6 +619,11 @@ static int unload_sched_routine(void) + restore_sched_debug_procfs(); + restore_sched_debugfs(); + #endif ++#ifdef CONFIG_GROUP_IDENTITY ++ cgroup_rm_cftypes(cpu_gi_files); ++ unregister_sysctl_table(gi_sysctl_header); ++ gi_sysctl_header = NULL; ++#endif + + sched_mempools_destroy(); + main_end = ktime_get(); +diff --git a/kernel/sched/mod/sched.h b/kernel/sched/mod/sched.h +index 8e09f5c64..d5e8e484b 100644 +--- a/kernel/sched/mod/sched.h ++++ b/kernel/sched/mod/sched.h +@@ -460,11 +460,6 @@ struct task_group { + #endif + #endif + +-#ifdef CONFIG_GROUP_IDENTITY +- int bvt_warp_ns; +- int id_flags; +-#endif +- + #ifdef CONFIG_RT_GROUP_SCHED + struct sched_rt_entity **rt_se; + struct rt_rq **rt_rq; +@@ -494,7 +489,13 @@ struct task_group { + struct uclamp_se uclamp[UCLAMP_CNT]; + #endif + ++#ifdef CONFIG_GROUP_IDENTITY ++ int bvt_warp_ns; ++ int id_flags; ++#else ++ + CK_HOTFIX_RESERVE(1) ++#endif + CK_HOTFIX_RESERVE(2) + CK_HOTFIX_RESERVE(3) + CK_HOTFIX_RESERVE(4) +@@ -614,18 +615,6 @@ struct cfs_rq { + + struct rb_root_cached tasks_timeline; + +-#ifdef CONFIG_GROUP_IDENTITY +- unsigned int nr_tasks; +- u64 min_under_vruntime; +-#ifdef CONFIG_SCHED_SMT +- u64 expel_spread; +- u64 expel_start; +- unsigned int h_nr_expel_immune; +- struct list_head expel_list; +-#endif +- struct rb_root_cached under_timeline; +-#endif +- + /* + * 'curr' points to currently running entity on this cfs_rq. + * It is set to NULL otherwise (i.e when none are currently running). +@@ -702,6 +691,23 @@ struct cfs_rq { + + unsigned long nr_uninterruptible; + ++#ifdef CONFIG_GROUP_IDENTITY ++ u64 min_under_vruntime; ++ unsigned int nr_tasks; ++#ifdef CONFIG_SCHED_SMT ++ unsigned int h_nr_expel_immune; ++ u64 expel_spread; ++ u64 expel_start; ++ struct list_head expel_list; ++#else ++ CK_HOTFIX_RESERVE(3) ++ CK_HOTFIX_RESERVE(4) ++ CK_HOTFIX_RESERVE(5) ++ CK_HOTFIX_RESERVE(6) ++ CK_HOTFIX_RESERVE(7) ++#endif ++ struct rb_root_cached under_timeline; ++#else + CK_HOTFIX_RESERVE(1) + CK_HOTFIX_RESERVE(2) + CK_HOTFIX_RESERVE(3) +@@ -710,6 +716,7 @@ struct cfs_rq { + CK_HOTFIX_RESERVE(6) + CK_HOTFIX_RESERVE(7) + CK_HOTFIX_RESERVE(8) ++#endif + }; + + static inline int rt_bandwidth_enabled(void) +@@ -1022,6 +1029,12 @@ struct uclamp_rq { + DECLARE_STATIC_KEY_FALSE(sched_uclamp_used); + #endif /* CONFIG_UCLAMP_TASK */ + ++#ifdef CONFIG_GROUP_IDENTITY ++DECLARE_PER_CPU(u64, high_exec_sum); ++DECLARE_PER_CPU(u64, under_exec_sum); ++DECLARE_PER_CPU(u64, under_exec_stamp); ++#endif ++ + /* + * This is the main, per-CPU runqueue data structure. + * +@@ -1075,24 +1088,6 @@ struct rq { + struct list_head *tmp_alone_branch; + #endif /* CONFIG_FAIR_GROUP_SCHED */ + +-#ifdef CONFIG_GROUP_IDENTITY +- unsigned int nr_high_running; +- unsigned int nr_under_running; +- unsigned int nr_expel_immune; +- long nr_high_make_up; +- long nr_under_make_up; +- bool smt_expeller; +- bool smt_expellee; +- bool on_expel; +- u64 high_exec_sum; +- u64 under_exec_sum; +- u64 under_exec_stamp; +- u64 avg_id_idle; +-#ifdef CONFIG_SCHED_SMT +- unsigned long next_expel_ib; +- unsigned long next_expel_update; +-#endif +-#endif + + /* + * This is part of a global counter where only the total sum +@@ -1216,6 +1211,24 @@ struct rq { + struct cpuidle_state *idle_state; + #endif + ++#ifdef CONFIG_GROUP_IDENTITY ++ unsigned int nr_high_running; ++ unsigned int nr_under_running; ++ unsigned int nr_expel_immune; ++ long nr_high_make_up; ++ long nr_under_make_up; ++ bool smt_expeller; ++ bool smt_expellee; ++ bool on_expel; ++ u64 avg_id_idle; ++#ifdef CONFIG_SCHED_SMT ++ unsigned long next_expel_ib; ++ unsigned long next_expel_update; ++#else ++ CK_HOTFIX_RESERVE(7) ++ CK_HOTFIX_RESERVE(8) ++#endif ++#else + CK_HOTFIX_RESERVE(1) + CK_HOTFIX_RESERVE(2) + CK_HOTFIX_RESERVE(3) +@@ -1224,6 +1237,7 @@ struct rq { + CK_HOTFIX_RESERVE(6) + CK_HOTFIX_RESERVE(7) + CK_HOTFIX_RESERVE(8) ++#endif + }; + + #ifdef CONFIG_FAIR_GROUP_SCHED +diff --git a/kernel/sched/mod/sched_rebuild.c b/kernel/sched/mod/sched_rebuild.c +index 219dd29e6..c7865cdcc 100644 +--- a/kernel/sched/mod/sched_rebuild.c ++++ b/kernel/sched/mod/sched_rebuild.c +@@ -114,8 +114,19 @@ void rebuild_sched_state(bool mod) + if (p == rq->stop) + continue; + +- if (task_on_rq_queued(p)) ++ if (task_on_rq_queued(p)) { + p->sched_class->enqueue_task(rq, p, queue_flags); ++#ifdef CONFIG_GROUP_IDENTITY ++ if (p->se.id_flags) ++ p->se.id_flags = 0; ++#ifdef CONFIG_SCHED_SMT ++ if (mod && !list_empty(&p->se.expel_node)) ++ INIT_LIST_HEAD(&p->se.expel_node); ++ if (!mod && (p->se.expel_node.next || p->se.expel_node.prev)) ++ memset(&p->se.expel_node, 0, sizeof(struct list_head)); ++#endif ++#endif ++ } + } + raw_spin_unlock(&rq->lock); + +-- +2.27.0 + diff --git a/0002-sched-resuce-dying-tasks-on-rq.patch b/0002-sched-resuce-dying-tasks-on-rq.patch new file mode 100644 index 0000000000000000000000000000000000000000..9cdc8f1ecd8500e7e937c85a227b94cd60750082 --- /dev/null +++ b/0002-sched-resuce-dying-tasks-on-rq.patch @@ -0,0 +1,103 @@ +From 2474c655755e97a1e62c323c460e9cb983e48c7a Mon Sep 17 00:00:00 2001 +From: Cruz Zhao +Date: Mon, 5 Dec 2022 16:26:42 +0800 +Subject: [PATCH 2/4] sched: resuce dying tasks on rq + +Signed-off-by: Cruz Zhao +Signed-off-by: Erwei Deng +--- + .../kernel/sched/mod/sched_rebuild.c | 49 ++++++++++++++----- + 1 file changed, 37 insertions(+), 12 deletions(-) + +diff --git a/kernel/sched/mod/sched_rebuild.c b/kernel/sched/mod/sched_rebuild.c +index c7865cdcc..25ff5c006 100644 +--- a/kernel/sched/mod/sched_rebuild.c ++++ b/kernel/sched/mod/sched_rebuild.c +@@ -38,6 +38,8 @@ struct sched_class *mod_class[] = { + &shadow_idle_sched_class, + }; + ++DEFINE_PER_CPU(struct list_head, tmp_list); ++ + #define NR_SCHED_CLASS 5 + struct sched_class bak_class[NR_SCHED_CLASS]; + +@@ -68,6 +70,7 @@ void clear_sched_state(bool mod) + struct task_struct *g, *p; + struct rq *rq = this_rq(); + int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; ++ int cpu = smp_processor_id(); + + raw_spin_lock(&rq->lock); + if (mod) { +@@ -89,6 +92,22 @@ void clear_sched_state(bool mod) + if (task_on_rq_queued(p)) + p->sched_class->dequeue_task(rq, p, queue_flags); + } ++ ++ p = NULL; ++ INIT_LIST_HEAD(&per_cpu(tmp_list, cpu)); ++ for(;;) { ++ if (mod) ++ p = fair_sched_class.pick_next_task(rq); ++ else ++ p = __orig_fair_sched_class.pick_next_task(rq); ++ if (!p) ++ break; ++ if (mod) ++ fair_sched_class.dequeue_task(rq, p, queue_flags); ++ else ++ __orig_fair_sched_class.dequeue_task(rq, p, queue_flags); ++ list_add_tail_rcu(&p->tasks, &per_cpu(tmp_list, cpu)); ++ } + raw_spin_unlock(&rq->lock); + } + +@@ -107,6 +126,23 @@ void rebuild_sched_state(bool mod) + __orig_set_rq_online(rq); + } + ++ list_for_each_entry_rcu(p, &per_cpu(tmp_list, cpu), tasks) { ++#ifdef CONFIG_GROUP_IDENTITY ++ p->se.id_flags = 0; ++#ifdef CONFIG_SCHED_SMT ++ if (mod) ++ INIT_LIST_HEAD(&p->se.expel_node); ++ else ++ memset(&p->se.expel_node, 0, sizeof(struct list_head)); ++ list_del_rcu(&p->tasks); ++ if (mod) ++ fair_sched_class.enqueue_task(rq, p, queue_flags); ++ else ++ __orig_fair_sched_class.enqueue_task(rq, p, queue_flags); ++#endif ++#endif ++ } ++ + for_each_process_thread(g, p) { + if (rq != task_rq(p)) + continue; +@@ -114,19 +150,8 @@ void rebuild_sched_state(bool mod) + if (p == rq->stop) + continue; + +- if (task_on_rq_queued(p)) { ++ if (task_on_rq_queued(p)) + p->sched_class->enqueue_task(rq, p, queue_flags); +-#ifdef CONFIG_GROUP_IDENTITY +- if (p->se.id_flags) +- p->se.id_flags = 0; +-#ifdef CONFIG_SCHED_SMT +- if (mod && !list_empty(&p->se.expel_node)) +- INIT_LIST_HEAD(&p->se.expel_node); +- if (!mod && (p->se.expel_node.next || p->se.expel_node.prev)) +- memset(&p->se.expel_node, 0, sizeof(struct list_head)); +-#endif +-#endif +- } + } + raw_spin_unlock(&rq->lock); + +-- +2.27.0 + diff --git a/0003-sched-fix-sysfs-removed-too-late.patch b/0003-sched-fix-sysfs-removed-too-late.patch new file mode 100644 index 0000000000000000000000000000000000000000..61c8a2663d1098e1002bb9d8a4fd2d215c2332dc --- /dev/null +++ b/0003-sched-fix-sysfs-removed-too-late.patch @@ -0,0 +1,54 @@ +From 847a2e79bd05045a224250560462480fd026954c Mon Sep 17 00:00:00 2001 +From: Cruz Zhao +Date: Mon, 5 Dec 2022 16:33:53 +0800 +Subject: [PATCH 3/4] sched: fix sysfs removed too late + +Signed-off-by: Cruz Zhao +Signed-off-by: Erwei Deng +--- + .../kernel/sched/mod/main.c | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/kernel/sched/mod/main.c b/kernel/sched/mod/main.c +index e650fd035..3ed051d29 100644 +--- a/kernel/sched/mod/main.c ++++ b/kernel/sched/mod/main.c +@@ -606,11 +606,21 @@ static int unload_sched_routine(void) + parallel_state_check_init(); + process_id_init(); + ++#ifdef CONFIG_GROUP_IDENTITY ++ cgroup_rm_cftypes(cpu_gi_files); ++ unregister_sysctl_table(gi_sysctl_header); ++ gi_sysctl_header = NULL; ++#endif + ret = sync_sched_mod(__sync_sched_restore); + cpu_maps_update_done(); +- if (ret) ++ if (ret) { ++#ifdef CONFIG_GROUP_IDENTITY ++ cgroup_add_legacy_cftypes(&cpu_cgrp_subsys, cpu_gi_files); ++ WARN_ON(gi_sysctl_header); ++ gi_sysctl_header = register_sysctl_paths(kernel_path, gi_table); ++#endif + return ret; +- ++ } + #ifdef CONFIG_SCHEDSTATS + restore_proc_schedstat(); + #endif +@@ -619,11 +629,6 @@ static int unload_sched_routine(void) + restore_sched_debug_procfs(); + restore_sched_debugfs(); + #endif +-#ifdef CONFIG_GROUP_IDENTITY +- cgroup_rm_cftypes(cpu_gi_files); +- unregister_sysctl_table(gi_sysctl_header); +- gi_sysctl_header = NULL; +-#endif + + sched_mempools_destroy(); + main_end = ktime_get(); +-- +2.27.0 + diff --git a/0004-sched-work-around-AliSecGuard.patch b/0004-sched-work-around-AliSecGuard.patch new file mode 100644 index 0000000000000000000000000000000000000000..2e46e0446a846e90c5bce89ae6db45f16544efc9 --- /dev/null +++ b/0004-sched-work-around-AliSecGuard.patch @@ -0,0 +1,66 @@ +From ca18290cd2bc7b2885533eb78b75c0338f523650 Mon Sep 17 00:00:00 2001 +Form: Cruz Zhao +Date: Mon, 5 Dec 2022 22:32:01 +0800 +Subject: [PATCH 4/4] sched: work around AliSecGuard + +AliSecGuard made plugsched unable to work on ECS instances. +This patch workarounds this issue by placing jmp instruction +5 bytes after the prolog of wake_up_new_task. + +Signed-off-by: Yihao Wu +Signed-off-by: Erwei Deng +--- + .../kernel/sched/mod/stack_check.h | 2 ++ + .../working/symbol_resolve/symbol_resolve.cpp | 8 ++++++-- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/mod/stack_check.h b/kernel/sched/mod/stack_check.h +index f83c463a8..e23d499f6 100644 +--- a/kernel/sched/mod/stack_check.h ++++ b/kernel/sched/mod/stack_check.h +@@ -25,6 +25,8 @@ static void stack_check_init(void) + #undef EXPORT_CALLBACK + + vm_func_size[NR___schedule] = 0; ++ vm_func_size[NR_wake_up_new_task] -= 5; ++ + addr_sort(vm_func_addr, vm_func_size, NR_INTERFACE_FN); + + #define EXPORT_CALLBACK(fn, ...) \ +diff --git a/working/symbol_resolve/symbol_resolve.cpp b/working/symbol_resolve/symbol_resolve.cpp +index 595a4c450..ee8a707ca 100644 +--- a/working/symbol_resolve/symbol_resolve.cpp ++++ b/working/symbol_resolve/symbol_resolve.cpp +@@ -69,6 +69,7 @@ static void resolve_ref(const char *fname, kallsym_collection &kallsyms, sympos_ + + /* Find UND symbols in kallsyms */ + for (i=0; i < sh.sh_size / sh.sh_entsize; i++) { ++ int offset = 0; + if (!gelf_getsym(data, i, &sym)) + ERROR("gelf_getsym", true); + if (!(name = elf_strptr(elf, sh.sh_link, sym.st_name))) +@@ -79,8 +80,11 @@ static void resolve_ref(const char *fname, kallsym_collection &kallsyms, sympos_ + * Filter out the "__orig_" prefix, which represents interface + * or callback functions defined in vmlinux. + */ +- if (strstr(name, "__orig_")) ++ if (strstr(name, "__orig_")) { + name += sizeof("__orig_") - 1; ++ if (!strcmp(name, "wake_up_new_task")) ++ offset = 5; ++ } + if (kallsyms.find(name) == kallsyms.end()) + continue; + kallsym = kallsyms[name]; +@@ -102,7 +106,7 @@ static void resolve_ref(const char *fname, kallsym_collection &kallsyms, sympos_ + sympos --; + /* Resolve UND symbols */ + sym.st_shndx = SHN_ABS; +- sym.st_value = kallsym[sympos]; ++ sym.st_value = kallsym[sympos] + offset; + modified = 1; + if (gelf_update_sym(data, i, &sym) == -1) + ERROR("gelf_update_sym", true); +-- +2.27.0 + diff --git a/README.en.md b/README.en.md index 71429558017c83c615843d8071592a6b1bd8505d..0f3d7c820547fbca471d6cf13ad98aedeb15759c 100644 --- a/README.en.md +++ b/README.en.md @@ -1,22 +1,7 @@ # scheduler-group-identity #### Description -{**When you're done, you can delete the content in this README and update the file with details for others getting started with your repository**} - -#### Software Architecture -Software architecture description - -#### Installation - -1. xxxx -2. xxxx -3. xxxx - -#### Instructions - -1. xxxx -2. xxxx -3. xxxx +Scheduler-group-identity is a scheduler plugin for the Anolis OS kernel that enables Group-Identity and can be used in CPU co-location scenarios. #### Contribution @@ -25,12 +10,3 @@ Software architecture description 3. Commit your code 4. Create Pull Request - -#### Gitee Feature - -1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md -2. Gitee blog [blog.gitee.com](https://blog.gitee.com) -3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) -4. The most valuable open source project [GVP](https://gitee.com/gvp) -5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) -6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/README.md b/README.md index 71cd4817483b4161d352d723848308a9ee811897..34139ee14f06785cc94461a64df99080f73f38d9 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,7 @@ # scheduler-group-identity #### 介绍 -{**以下是 Gitee 平台说明,您可以替换此简介** -Gitee 是 OSCHINA 推出的基于 Git 的代码托管平台(同时支持 SVN)。专为开发者提供稳定、高效、安全的云端软件开发协作平台 -无论是个人、团队、或是企业,都能够用 Gitee 实现代码托管、项目管理、协作开发。企业项目请看 [https://gitee.com/enterprises](https://gitee.com/enterprises)} - -#### 软件架构 -软件架构说明 - - -#### 安装教程 - -1. xxxx -2. xxxx -3. xxxx - -#### 使用说明 - -1. xxxx -2. xxxx -3. xxxx +scheduler-group-identity 是 Anolis OS 内核的调度器插件,里面启用了 Group-Identity 特性,可以用于 CPU 混部场景。 #### 参与贡献 @@ -27,13 +9,3 @@ Gitee 是 OSCHINA 推出的基于 Git 的代码托管平台(同时支持 SVN 2. 新建 Feat_xxx 分支 3. 提交代码 4. 新建 Pull Request - - -#### 特技 - -1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md -2. Gitee 官方博客 [blog.gitee.com](https://blog.gitee.com) -3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解 Gitee 上的优秀开源项目 -4. [GVP](https://gitee.com/gvp) 全称是 Gitee 最有价值开源项目,是综合评定出的优秀开源项目 -5. Gitee 官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) -6. Gitee 封面人物是一档用来展示 Gitee 会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/hotfix_conflict_check b/hotfix_conflict_check new file mode 100755 index 0000000000000000000000000000000000000000..1be3371529bc82e543bc7c8156a49703b4176ad9 --- /dev/null +++ b/hotfix_conflict_check @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright 2019-2022 Alibaba Group Holding Limited. +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +# input file format: +# function sympos module +# +# valid e.g: +# pick_next_task 1 vmlinux +# ext4_free_blocks 2 ext4 + +if [ "$1" == "" ]; then + echo Error: please input files! + exit 1 +elif [ ! -e "$1" ]; then + echo Error: input file is not exist! + exit 1 +else + tainted_file=$1 +fi + +func_list=$(mktemp) + +# Some hotfix do not provide the sympos of patched function, so use a new set +func_list_nosympos=$(mktemp) + +trap "rm -r $func_list $func_list_nosympos" INT HUP QUIT ABRT ALRM TERM EXIT # ensures it is deleted when script ends + +# deal with kpatch prev-0.4 ABI +find /sys/kernel/kpatch/patches/*/functions -type d -not -path "*/functions" 2>/dev/null | while read path ; do + # /sys/kernel/kpatch/patches/kpatch_D689377/functions/blk_mq_update_queue_map -> blk_mq_update_queue_map + func="${path##*/}" + echo "$func" >> $func_list_nosympos +done + +# deal with kpatch 0.4 ABI, livepatch and plugsched +for subdir in kpatch livepatch plugsched; do + find /sys/kernel/$subdir/*/ -type d -path "*,[0-9]" 2>/dev/null | while read path ; do + # /sys/kernel/kpatch/kpatch_5135717/vmlinux/kernfs_find_ns,1 -> kernfs_find_ns,1 + func_ver=`echo $path | awk -F / -e '{print $NF}'` + mod=`echo $path | awk -F / -e '{print $(NF-1)}'` + func=`echo $func_ver | awk -F , '{print $1}'` + ver=`echo $func_ver | awk -F , '{print $2}'` + echo "$func $ver $mod" >> $func_list + done +done + +# deal with manual hotfix that has sys directory entry +find /sys/kernel/manual_*/ -type d -not -path "*manual_*/" 2>/dev/null | while read path ; do + func="${path##*/}" + echo "$func" >> $func_list_nosympos +done + +# deal with manual hotfix that does not have sys directory entry, i.e, the early days implemenation +for func in `cat /proc/kallsyms | grep '\[kpatch_' | grep -v __kpatch | awk '{print $3}' | grep -v 'patch_'`; do + if [ $(grep "e9_$func" /proc/kallsyms | wc -l) -gt 0 ]; then + echo "$func" >> $func_list_nosympos + fi +done + +if [ "$(awk 'END{print NF}' $tainted_file)" != "3" ]; then + # tainted_file provided by manual_hotfix or kpatch-pre-0.4 that don't have the sympos + conflicts=$(sort <(awk '{print $1}' $tainted_file) <(awk '{print $1}' $func_list | sort | uniq) | uniq -d) +else + # Get the conflict functions + conflicts=$(sort $tainted_file <(awk '{print $1" "$2" "$3}' $func_list | sort | uniq) | uniq -d) +fi + +conflicts_nosympos=$(sort <(awk '{print $1}' $tainted_file) <(awk '{print $1}' $func_list_nosympos | sort | uniq) | uniq -d) + +if [ "$conflicts" != "" -o "$conflicts_nosympos" != "" ]; then + echo Error: confict detected: + if [ "$conflicts" != "" ]; then + echo $(awk '{print $1}' <(echo $conflicts)) + elif [ "$conflicts_nosympos" != "" ]; then + echo $conflicts_nosympos + fi + exit 1 +fi diff --git a/plugsched.service b/plugsched.service new file mode 100644 index 0000000000000000000000000000000000000000..656550c9b3685f2271875db59d8df567bfbe1203 --- /dev/null +++ b/plugsched.service @@ -0,0 +1,20 @@ +# Copyright 2019-2022 Alibaba Group Holding Limited. +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +[Unit] +Description=The plugsched service +ConditionKernelCommandLine=!plugsched.enable=0 + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/bin/bash -c "\ + if [ -d /var/plugsched/$(uname -r) ]; then \ + /var/plugsched/$(uname -r)/scheduler-installer install; \ + else \ + echo \"Scheduler for the current kernel version is not installed. Start service failed!\"; \ + exit 1; \ + fi" + +[Install] +WantedBy=multi-user.target diff --git a/scheduler-5.10.134-12.2.tar.gz b/scheduler-5.10.134-12.2.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..31d2563d22f92d7be1633f5f742fb49cd75d1a5b Binary files /dev/null and b/scheduler-5.10.134-12.2.tar.gz differ diff --git a/scheduler-group-identity.spec b/scheduler-group-identity.spec new file mode 100644 index 0000000000000000000000000000000000000000..72f4885adacb49d6229058df41534b52fe5a22fd --- /dev/null +++ b/scheduler-group-identity.spec @@ -0,0 +1,113 @@ +# Copyright 2019-2022 Alibaba Group Holding Limited. +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +%define anolis_version 1 +%define KVER 5.10.134 +%define KREL 12.2 +%define anolis_release 1 + +Name: scheduler-group-identity +Version: %{KVER}.%{KREL}.%{anolis_version} +Release: %{anolis_release}%{?dist} +Summary: The scheduler of Group Identity feature of Anolis cloud kernel. +Packager: Erwei Deng + +Group: System Environment/Kernel +License: GPLv2 +URL: https://gitee.com/anolis/scheduler + +Source0: scheduler-%{KVER}-%{KREL}.tar.gz +Source1: plugsched.service +Source2: scheduler-installer +Source3: hotfix_conflict_check +Source4: version + +Patch0001: 0001-sched-enable-the-group-identity.patch +Patch0002: 0002-sched-resuce-dying-tasks-on-rq.patch +Patch0003: 0003-sched-fix-sysfs-removed-too-late.patch +Patch0004: 0004-sched-work-around-AliSecGuard.patch + +ExclusiveArch: x86_64 + +# Used to build kernel module and symbol-reserve +BuildRequires: elfutils-devel, elfutils-devel-static +BuildRequires: make, gcc-c++, bison, flex, openssl, openssl-devel +BuildRequires: glibc-static, zlib-static, libstdc++-static + +Requires: systemd +Requires: binutils + +%description +The scheduler-group-identity is a scheduler with Group-Identity feature of Anolis Kernel Cloud that can be used to CPU co-location scenario. + +%prep +%autosetup -p1 + +%build +# Build scheduler module +make KBUILD_MODPOST_WARN=1 \ + plugsched_tmpdir=working/ \ + plugsched_modpath=kernel/sched/mod/ \ + sidecar_objs= -C . \ + -f working/Makefile.plugsched plugsched \ + -j 1 + +# Build symbol resolve tool +make -C working/symbol_resolve + +# Generate the tainted_functions file +awk -F '[(,)]' '$2!=""{print $2" "$3" vmlinux"}' kernel/sched/mod/tainted_functions.h > working/tainted_functions + +%install +#install tool, module and systemd service +mkdir -p %{buildroot}/usr/lib/systemd/system +mkdir -p %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch} + +install -m 755 working/symbol_resolve/symbol_resolve \ + %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/symbol_resolve +install -m 755 kernel/sched/mod/scheduler.ko \ + %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/scheduler.ko +install -m 444 working/tainted_functions \ + %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/tainted_functions +install -m 444 working/boundary.yaml \ + %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/boundary.yaml +install -m 644 %{SOURCE1} %{buildroot}/usr/lib/systemd/system/plugsched.service +install -m 755 %{SOURCE2} %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/scheduler-installer +install -m 755 %{SOURCE3} %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/hotfix_conflict_check +install -m 444 %{SOURCE4} %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/version + +%post +sync + +if [ "$(uname -r)" != "%{KVER}-%{KREL}%{?dist}.%{_arch}" ]; then + echo "INFO: scheduler does not match current kernel version, skip starting service ..." + exit 0 +fi + +echo "Start plugsched.service" +systemctl enable plugsched +systemctl start plugsched + +#uninstall kernel module before remove this rpm-package +%preun +if [ "$(uname -r)" != "%{KVER}-%{KREL}%{?dist}.%{_arch}" ]; then + echo "INFO: scheduler does not match current kernel version, skip unloading module..." + exit 0 +fi + +echo "Stop plugsched.service" +/var/plugsched/$(uname -r)/scheduler-installer uninstall || exit 1 +systemctl stop plugsched + +%postun +systemctl reset-failed plugsched + +%files +%dir %{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch} +/usr/lib/systemd/system/plugsched.service +%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/* + + +%changelog +* Mon Dec 5 2022 Erwei Deng - 5.10.134.12.2.1-1 +- add the group identity feature to scheduler diff --git a/scheduler-installer b/scheduler-installer new file mode 100755 index 0000000000000000000000000000000000000000..1277c20ed29b387794aba1ea653c69c458352821 --- /dev/null +++ b/scheduler-installer @@ -0,0 +1,85 @@ +#!/bin/bash +# Copyright 2019-2022 Alibaba Group Holding Limited. +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +MAX_LOAD_ATTEMPTS=5 +RETRY_INTERVAL=2 + +cursys=$(uname -r) +modfile=/var/plugsched/$cursys/scheduler.ko +hotfix_conflict_check=/var/plugsched/$cursys/hotfix_conflict_check +tainted_functions=/var/plugsched/$cursys/tainted_functions +enablefile=/sys/kernel/plugsched/plugsched/enable +mod=$(modinfo $modfile | grep vermagic | awk '{print $2}') + +warn() { + echo "scheduler: $*" >&2 +} + +install_module() { + local i=0 + while true; do + out="$(LC_ALL=C insmod "$1" 2>&1)" + [[ -z "$out" ]] && break + echo "$out" 1>&2 + + # Safety check or memory pool allocated failed! Retry in a few seconds. + i=$((i+1)) + if [[ $i -eq $MAX_LOAD_ATTEMPTS ]]; then + warn "load module failed! $1" + exit 1 + else + warn "retrying..." + sleep $RETRY_INTERVAL + fi + done +} + +uninstall_module() { + local i=0 + while true; do + out="$(export LC_ALL=C; sh -c "echo 0 > $enablefile" 2>&1)" + [[ -z "$out" ]] && break + echo "$out" 1>&2 + + # Safety check failed! Retry in a few seconds. + i=$((i+1)) + if [[ $i -eq $MAX_LOAD_ATTEMPTS ]]; then + warn "disable module failed!" + exit 1 + else + warn "retrying..." + sleep $RETRY_INTERVAL + fi + done + rmmod scheduler +} + +if [ "$1" == "install" ]; then + if [ -f "$enablefile" ]; then + echo "scheduler: scheduler module has been installed! Skip..." + exit + fi + + if [ "$cursys" == "$mod" ]; then + $hotfix_conflict_check $tainted_functions || exit 1 + /usr/bin/mkdir -p /run/plugsched + /usr/bin/cp $modfile /run/plugsched/scheduler.ko + /var/plugsched/$(uname -r)/symbol_resolve /run/plugsched/scheduler.ko /proc/kallsyms + install_module /run/plugsched/scheduler.ko + else + warn "Error: kernel version is not same as plugsched version!" + exit 1 + fi +elif [ "$1" == "uninstall" ]; then + if [ -f "$enablefile" ]; then + uninstall_module + else + echo "scheduler: scheduler module has been removed! Skip ..." + fi + + /usr/bin/rm -rf /run/plugsched +else + warn "Error: Unknown operation" + exit 1 +fi diff --git a/version b/version new file mode 100644 index 0000000000000000000000000000000000000000..dd2fa49022e76d3357bd83eabfb2f9bb9896dad9 --- /dev/null +++ b/version @@ -0,0 +1 @@ +plugsched version: 0.0.0