diff --git a/0001-sched-enable-the-group-identity.patch b/0001-sched-enable-the-group-identity.patch index 618d8398c9d4cbd8c8be538cf653728744d546dd..9a55d24a679e007d8ff424b877825e426b71029a 100644 --- a/0001-sched-enable-the-group-identity.patch +++ b/0001-sched-enable-the-group-identity.patch @@ -1,7 +1,7 @@ From bb436a218e495ddc7841a060cc80c362d9a7b430 Mon Sep 17 00:00:00 2001 From: Cruz Zhao Date: Mon, 5 Dec 2022 16:02:37 +0800 -Subject: [PATCH 1/4] sched: enable the group identity +Subject: [PATCH 1/6] sched: enable the group identity Signed-off-by: Erwei Deng Signed-off-by: Cruz Zhao diff --git a/0002-sched-resuce-dying-tasks-on-rq.patch b/0002-sched-resuce-dying-tasks-on-rq.patch index 9cdc8f1ecd8500e7e937c85a227b94cd60750082..cb51ad122bfe1c47c682983fd4abb556a6bed483 100644 --- a/0002-sched-resuce-dying-tasks-on-rq.patch +++ b/0002-sched-resuce-dying-tasks-on-rq.patch @@ -1,7 +1,7 @@ From 2474c655755e97a1e62c323c460e9cb983e48c7a Mon Sep 17 00:00:00 2001 From: Cruz Zhao Date: Mon, 5 Dec 2022 16:26:42 +0800 -Subject: [PATCH 2/4] sched: resuce dying tasks on rq +Subject: [PATCH 2/6] sched: resuce dying tasks on rq Signed-off-by: Cruz Zhao Signed-off-by: Erwei Deng diff --git a/0003-sched-fix-sysfs-removed-too-late.patch b/0003-sched-fix-sysfs-removed-too-late.patch index 61c8a2663d1098e1002bb9d8a4fd2d215c2332dc..0339c8b32f73a2076a054ba6227ccfb878563778 100644 --- a/0003-sched-fix-sysfs-removed-too-late.patch +++ b/0003-sched-fix-sysfs-removed-too-late.patch @@ -1,7 +1,7 @@ From 847a2e79bd05045a224250560462480fd026954c Mon Sep 17 00:00:00 2001 From: Cruz Zhao Date: Mon, 5 Dec 2022 16:33:53 +0800 -Subject: [PATCH 3/4] sched: fix sysfs removed too late +Subject: [PATCH 3/6] sched: fix sysfs removed too late Signed-off-by: Cruz Zhao Signed-off-by: Erwei Deng diff --git a/0004-sched-work-around-AliSecGuard.patch b/0004-sched-work-around-AliSecGuard.patch index 2e46e0446a846e90c5bce89ae6db45f16544efc9..c1be866a9ca19f425586a28e60faaef465ccdaad 100644 --- a/0004-sched-work-around-AliSecGuard.patch +++ b/0004-sched-work-around-AliSecGuard.patch @@ -1,7 +1,7 @@ From ca18290cd2bc7b2885533eb78b75c0338f523650 Mon Sep 17 00:00:00 2001 Form: Cruz Zhao Date: Mon, 5 Dec 2022 22:32:01 +0800 -Subject: [PATCH 4/4] sched: work around AliSecGuard +Subject: [PATCH 4/6] sched: work around AliSecGuard AliSecGuard made plugsched unable to work on ECS instances. This patch workarounds this issue by placing jmp instruction diff --git a/0005-sched-fix-panic-when-CPU-hotplug-on-5.10.patch b/0005-sched-fix-panic-when-CPU-hotplug-on-5.10.patch new file mode 100644 index 0000000000000000000000000000000000000000..91ef5bff9a9a1727e0dd7707b8f94b3b9bd30a69 --- /dev/null +++ b/0005-sched-fix-panic-when-CPU-hotplug-on-5.10.patch @@ -0,0 +1,42 @@ +From 82a9b17ad63e0de8d6caa2bb967e5e7b6f7cee14 Mon Sep 17 00:00:00 2001 +From: Yihao Wu +Date: Tue, 6 Dec 2022 10:32:53 +0800 +Subject: [PATCH 5/6] sched: fix panic when CPU hotplug on 5.10 + +Since commit ace8031099f9("sched/topology: Make local variables static"), +sched_domains_tmpmask becomes local variable. And plugsched tags local +variables as private variables. Besides, sched_domain is a component +which is not covered by sched_rebuild technique. These two facts make +sched_domains_tmpmask re-initialized to zero after loading the module. + +However, sched_domains_tmpmask should be shared with the kernel because +it contains running state information. Otherwise, when rebuilding the +sched domain (e.g. when CPU hotplug), the kernel panics. + +So put sched_domains_tmpmask (and sched_domains_tmpmask2) to extra public +to fix this panic issue. + +Signed-off-by: Yihao Wu +Signed-off-by: Erwei Deng +--- + .../kernel/sched/mod/topology.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/kernel/sched/mod/topology.c b/kernel/sched/mod/topology.c +index 3da11a57f..d79045fa8 100644 +--- a/kernel/sched/mod/topology.c ++++ b/kernel/sched/mod/topology.c +@@ -7,8 +7,8 @@ + extern struct mutex sched_domains_mutex; + + /* Protected by sched_domains_mutex: */ +-static __used cpumask_var_t sched_domains_tmpmask; +-static __used cpumask_var_t sched_domains_tmpmask2; ++extern cpumask_var_t sched_domains_tmpmask; ++extern cpumask_var_t sched_domains_tmpmask2; + + #ifdef CONFIG_SCHED_DEBUG + +-- +2.27.0 + diff --git a/0006-sched-add-the-sidecar-of-smp.c-to-fix-the-scheduler_.patch b/0006-sched-add-the-sidecar-of-smp.c-to-fix-the-scheduler_.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb0c03441bea3968415d7b5252c716891cae776b --- /dev/null +++ b/0006-sched-add-the-sidecar-of-smp.c-to-fix-the-scheduler_.patch @@ -0,0 +1,215 @@ +From 8a26b4dc36cc080e64ae45a1b0e9e5a1af01f0c4 Mon Sep 17 00:00:00 2001 +From: Erwei Deng +Date: Tue, 6 Dec 2022 15:32:43 +0800 +Subject: [PATCH 6/6] sched: add the sidecar of smp.c to fix the scheduler_ipi + +Signed-off-by: Yihao Wu +Signed-off-by: Erwei Deng +Signed-off-by: Cruz Zhao +--- + .../kernel/sched/mod/export_jump.h | 1 + + .../kernel/sched/mod/smp.c | 148 ++++++++++++++++++ + .../kernel/sched/mod/tainted_functions.h | 1 + + .../working/boundary.yaml | 3 +- + .../symbol_resolve/undefined_functions.h | 3 +- + 5 files changed, 154 insertions(+), 2 deletions(-) + create mode 100644 kernel/sched/mod/smp.c + +diff --git a/kernel/sched/mod/export_jump.h b/kernel/sched/mod/export_jump.h +index 50e42d49e..3f3d85d7a 100644 +--- a/kernel/sched/mod/export_jump.h ++++ b/kernel/sched/mod/export_jump.h +@@ -152,3 +152,4 @@ EXPORT_PLUGSCHED(wake_up_new_task, void, struct task_struct *) + EXPORT_PLUGSCHED(wake_up_nohz_cpu, void, int) + EXPORT_PLUGSCHED(yield, void, void) + EXPORT_PLUGSCHED(yield_to, int, struct task_struct *, bool) ++EXPORT_PLUGSCHED(sysvec_reschedule_ipi, void, struct pt_regs *) +diff --git a/kernel/sched/mod/smp.c b/kernel/sched/mod/smp.c +new file mode 100644 +index 000000000..c0d8da79a +--- /dev/null ++++ b/kernel/sched/mod/smp.c +@@ -0,0 +1,148 @@ ++// SPDX-License-Identifier: GPL-2.0-or-later ++/* ++ * Intel SMP support routines. ++ * ++ * (c) 1995 Alan Cox, Building #3 ++ * (c) 1998-99, 2000, 2009 Ingo Molnar ++ * (c) 2002,2003 Andi Kleen, SuSE Labs. ++ * ++ * i386 and x86_64 integration by Glauber Costa ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * Some notes on x86 processor bugs affecting SMP operation: ++ * ++ * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. ++ * The Linux implications for SMP are handled as follows: ++ * ++ * Pentium III / [Xeon] ++ * None of the E1AP-E3AP errata are visible to the user. ++ * ++ * E1AP. see PII A1AP ++ * E2AP. see PII A2AP ++ * E3AP. see PII A3AP ++ * ++ * Pentium II / [Xeon] ++ * None of the A1AP-A3AP errata are visible to the user. ++ * ++ * A1AP. see PPro 1AP ++ * A2AP. see PPro 2AP ++ * A3AP. see PPro 7AP ++ * ++ * Pentium Pro ++ * None of 1AP-9AP errata are visible to the normal user, ++ * except occasional delivery of 'spurious interrupt' as trap #15. ++ * This is very rare and a non-problem. ++ * ++ * 1AP. Linux maps APIC as non-cacheable ++ * 2AP. worked around in hardware ++ * 3AP. fixed in C0 and above steppings microcode update. ++ * Linux does not use excessive STARTUP_IPIs. ++ * 4AP. worked around in hardware ++ * 5AP. symmetric IO mode (normal Linux operation) not affected. ++ * 'noapic' mode has vector 0xf filled out properly. ++ * 6AP. 'noapic' mode might be affected - fixed in later steppings ++ * 7AP. We do not assume writes to the LVT deassering IRQs ++ * 8AP. We do not enable low power mode (deep sleep) during MP bootup ++ * 9AP. We do not use mixed mode ++ * ++ * Pentium ++ * There is a marginal case where REP MOVS on 100MHz SMP ++ * machines with B stepping processors can fail. XXX should provide ++ * an L1cache=Writethrough or L1cache=off option. ++ * ++ * B stepping CPUs may hang. There are hardware work arounds ++ * for this. We warn about it in case your board doesn't have the work ++ * arounds. Basically that's so I can tell anyone with a B stepping ++ * CPU and SMP problems "tough". ++ * ++ * Specific items [From Pentium Processor Specification Update] ++ * ++ * 1AP. Linux doesn't use remote read ++ * 2AP. Linux doesn't trust APIC errors ++ * 3AP. We work around this ++ * 4AP. Linux never generated 3 interrupts of the same priority ++ * to cause a lost local interrupt. ++ * 5AP. Remote read is never used ++ * 6AP. not affected - worked around in hardware ++ * 7AP. not affected - worked around in hardware ++ * 8AP. worked around in hardware - we get explicit CS errors if not ++ * 9AP. only 'noapic' mode affected. Might generate spurious ++ * interrupts, we log only the first one and count the ++ * rest silently. ++ * 10AP. not affected - worked around in hardware ++ * 11AP. Linux reads the APIC between writes to avoid this, as per ++ * the documentation. Make sure you preserve this as it affects ++ * the C stepping chips too. ++ * 12AP. not affected - worked around in hardware ++ * 13AP. not affected - worked around in hardware ++ * 14AP. we always deassert INIT during bootup ++ * 15AP. not affected - worked around in hardware ++ * 16AP. not affected - worked around in hardware ++ * 17AP. not affected - worked around in hardware ++ * 18AP. not affected - worked around in hardware ++ * 19AP. not affected - worked around in BIOS ++ * ++ * If this sounds worrying believe me these bugs are either ___RARE___, ++ * or are signal timing bugs worked around in hardware and there's ++ * about nothing of note with C stepping upwards. ++ */ ++ ++extern atomic_t stopping_cpu; ++extern bool smp_no_nmi_ipi; ++ ++static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs); ++ ++/* ++ * this function calls the 'stop' function on all other CPUs in the system. ++ */ ++; ++ ++static int register_stop_handler(void); ++extern struct nmiaction smp_stop_nmi_callback_na; ++ ++/* ++ * Reschedule call back. KVM uses this interrupt to force a cpu out of ++ * guest mode. ++ */ ++DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_reschedule_ipi) ++{ ++ ack_APIC_irq(); ++ trace_reschedule_entry(RESCHEDULE_VECTOR); ++ inc_irq_stat(irq_resched_count); ++ scheduler_ipi(); ++ trace_reschedule_exit(RESCHEDULE_VECTOR); ++} ++ ++; ++ ++; ++ ++ ++ ++extern struct smp_ops smp_ops; +diff --git a/kernel/sched/mod/tainted_functions.h b/kernel/sched/mod/tainted_functions.h +index c5eef3e31..9f8927f09 100644 +--- a/kernel/sched/mod/tainted_functions.h ++++ b/kernel/sched/mod/tainted_functions.h +@@ -522,3 +522,4 @@ TAINTED_FUNCTION(find_later_rq,1) + TAINTED_FUNCTION(group_balance_cpu,1) + TAINTED_FUNCTION(cpu_cgroup_css_released,1) + TAINTED_FUNCTION(partition_sched_domains_locked,1) ++TAINTED_FUNCTION(sysvec_reschedule_ipi,1) +diff --git a/working/boundary.yaml b/working/boundary.yaml +index c0405b00f..09d35a507 100644 +--- a/working/boundary.yaml ++++ b/working/boundary.yaml +@@ -119,4 +119,5 @@ global_var: + - rt_sched_class + - fair_sched_class + - idle_sched_class +-sidecar: ++sidecar: !!pairs ++ - sysvec_reschedule_ipi: arch/x86/kernel/smp.c +diff --git a/working/symbol_resolve/undefined_functions.h b/working/symbol_resolve/undefined_functions.h +index ee4639faa..fcbc51229 100644 +--- a/working/symbol_resolve/undefined_functions.h ++++ b/working/symbol_resolve/undefined_functions.h +@@ -247,4 +247,5 @@ + {"cpu_cgroup_css_released", 1}, + {"cpu_cgroup_css_online", 1}, + {"partition_sched_domains_locked", 0}, +-{"init_sched_dl_class", 0} +\ No newline at end of file ++{"sysvec_reschedule_ipi", 0}, ++{"init_sched_dl_class", 0} +-- +2.27.0 + diff --git a/scheduler-group-identity.spec b/scheduler-group-identity.spec index 72f4885adacb49d6229058df41534b52fe5a22fd..5c3121c4a882e270b69aa9506c6b25f25c822f9f 100644 --- a/scheduler-group-identity.spec +++ b/scheduler-group-identity.spec @@ -26,13 +26,16 @@ Patch0001: 0001-sched-enable-the-group-identity.patch Patch0002: 0002-sched-resuce-dying-tasks-on-rq.patch Patch0003: 0003-sched-fix-sysfs-removed-too-late.patch Patch0004: 0004-sched-work-around-AliSecGuard.patch +Patch0005: 0005-sched-fix-panic-when-CPU-hotplug-on-5.10.patch +Patch0006: 0006-sched-add-the-sidecar-of-smp.c-to-fix-the-scheduler_.patch ExclusiveArch: x86_64 # Used to build kernel module and symbol-reserve -BuildRequires: elfutils-devel, elfutils-devel-static -BuildRequires: make, gcc-c++, bison, flex, openssl, openssl-devel +BuildRequires: make, gcc-c++, bc, bison, flex, openssl, openssl-devel BuildRequires: glibc-static, zlib-static, libstdc++-static +BuildRequires: elfutils-devel, elfutils-devel-static, elfutils-libelf-devel +BuildRequires: python3, python3-devel Requires: systemd Requires: binutils @@ -48,7 +51,7 @@ The scheduler-group-identity is a scheduler with Group-Identity feature of Anoli make KBUILD_MODPOST_WARN=1 \ plugsched_tmpdir=working/ \ plugsched_modpath=kernel/sched/mod/ \ - sidecar_objs= -C . \ + sidecar_objs=smp.o -C . \ -f working/Makefile.plugsched plugsched \ -j 1