diff --git a/1001-sched-fair-Introduce-ID_LOAD_BALANCE.patch b/1001-sched-fair-Introduce-ID_LOAD_BALANCE.patch new file mode 100644 index 0000000000000000000000000000000000000000..03a339fcb0dc32840ab097e69353ca978362c046 --- /dev/null +++ b/1001-sched-fair-Introduce-ID_LOAD_BALANCE.patch @@ -0,0 +1,386 @@ +From 36e6dcf7fd3f9b160a99dd6fa155ca3181ab52e2 Mon Sep 17 00:00:00 2001 +From: Cruz Zhao +Date: Sat, 10 Dec 2022 13:56:05 +0000 +Subject: [PATCH] sched/fair: Introduce ID_LOAD_BALANCE + +This patch introduces ID_LOAD_BALANCE, provides backup for +select_idle_core and select_idle_cpu, and also give more +chance for highclass and normal task to find an idle cpu. + +This patch aims at prevent expeller and other tasks +running on the same core at the same time. + +select_idle_core: + - If cpus of the core are all id_idle, it's an available + backup core. + - If one cpu is id_idle and the other is preemptable for + highclass, it's also an available backup core. + +select_idle_cpu: + - If target is on expel, find an unexpelling cpu as backup + +select_idle_sibling: + - If there's no idle cpu in LLC, find it in a higher sched domain. + +This patch also fixes id_wake_affine. + +This patch also changes the following default sched features: + + - ID_RESCUE_EXPELLEE false + - ID_EXPELLER_SHARE_CORE false + - ID_LOAD_BALANCE true + +Signed-off-by: Cruz Zhao +--- + kernel/sched/mod/fair.c | 171 +++++++++++++++++++++++++++++++++----------- + kernel/sched/mod/features.h | 1 + + 2 files changed, 130 insertions(+), 42 deletions(-) + +diff --git a/kernel/sched/mod/fair.c b/kernel/sched/mod/fair.c +index ad26b4a..0f14d17 100644 +--- a/kernel/sched/mod/fair.c ++++ b/kernel/sched/mod/fair.c +@@ -963,8 +963,8 @@ static inline bool is_idle_seeker_task(struct task_struct *p) + && src_rq->nr_high_running < 2) + goto bad_dst; + +- if (!sched_feat(ID_EXPELLER_SHARE_CORE) && +- task_is_expeller(p) && __rq_on_expel(dst_rq)) ++ /* Expeller task doesn't wanna share core with any task*/ ++ if (!sched_feat(ID_EXPELLER_SHARE_CORE) && __rq_on_expel(dst_rq)) + goto bad_dst; + + if (!__is_expellee_task(p)) +@@ -1005,12 +1005,12 @@ static inline bool is_idle_seeker_task(struct task_struct *p) + + this_rq = cpu_rq(this_cpu); + prev_rq = cpu_rq(prev_cpu); +- /* Last highclass should stay */ +- if (__is_highclass_task(p) && prev_rq->nr_high_running < 1) +- return false; ++ if (sched_feat(ID_LAST_HIGHCLASS_STAY) && __is_highclass_task(p) && prev_rq->nr_high_running < 1) ++ /* Here's some problem before */ ++ return true; + +- /* Do not pull underclass to the cpu on expel */ +- if (__is_expellee_task(p) && __rq_on_expel(this_rq)) ++ /* Do not pull anyone to the cpu on expel */ ++ if (!sched_feat(ID_EXPELLER_SHARE_CORE) && __rq_on_expel(this_rq)) + return false; + + return true; +@@ -1055,16 +1055,16 @@ static inline bool is_idle_seeker_task(struct task_struct *p) + * highclass workload are heavy, for others they + * don't really need to worry about this. + */ +- if (!sched_feat(ID_EXPELLER_SHARE_CORE) && +- task_is_expeller(p) && __rq_on_expel(rq)) ++ /* Expeller task doesn't wanna share core with any task*/ ++ if (!sched_feat(ID_EXPELLER_SHARE_CORE) && __rq_on_expel(rq)) + return false; + + if (need_expel) + return false; + +- /* CPU full of underclass is idle for highclass */ ++ /* CPU full of underclass is idle for highclass and normal task*/ + if (!is_idle) +- return __is_highclass_task(p) && underclass_only(cpu); ++ return !is_underclass_task(p) && underclass_only(cpu); + + if (!is_saver) + return true; +@@ -1076,6 +1076,23 @@ static inline bool is_idle_seeker_task(struct task_struct *p) + return avg_idle >= sysctl_sched_idle_saver_wmark; + } + ++/* Whether this core is id_idle for task p */ ++static bool id_idle_core(struct task_struct *p, int core) ++{ ++ int cpu; ++ ++ if (group_identity_disabled() || !sched_feat(ID_LOAD_BALANCE)) ++ return true; ++ ++ if (!__is_highclass_task(p)) ++ return true; ++ for_each_cpu(cpu, cpu_smt_mask(core)) ++ if (!id_idle_cpu(p, cpu, false, NULL)) ++ return false; ++ ++ return true; ++} ++ + #ifdef CONFIG_CFS_BANDWIDTH + static noinline void + id_update_make_up(struct task_group *tg, struct rq *rq, struct cfs_rq *cfs_rq, +@@ -7991,11 +8008,13 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); + int core, cpu, id_backup = -1; + bool is_expellee, do_clear = true; ++ int max_id_idle = 0, max_id_idle_core = -1; + + if (!static_branch_likely(&sched_smt_present)) + return -1; + +- if (!test_idle_cores(target, false)) ++ /* If there's an available id_idle_core, find it */ ++ if (!test_idle_cores(target, false) && (group_identity_disabled() || !sched_feat(ID_LOAD_BALANCE) || is_underclass_task(p))) + return -1; + + cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); +@@ -8003,45 +8022,58 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int + is_expellee = is_expellee_task(p); + for_each_cpu_wrap(core, cpus, target) { + bool idle = true; +- bool id_idle = true; +- +- for_each_cpu(cpu, cpu_smt_mask(core)) { +- bool is_idle = true; ++ int id_idle = 0; ++ bool is_idle; ++ int preemptable_cpu = -1; + +- cpumask_clear_cpu(cpu, cpus); +- if (!group_identity_disabled()) { +- if (!id_idle_cpu(p, cpu, is_expellee, &is_idle)) +- id_idle = false; +- } else { +- is_idle = available_idle_cpu(cpu); ++ /* If cpus of the core are all id_idle, it's an available backup core. ++ * If one cpu is id_idle and the other is preemptable for highclass, ++ * it's also an available backup core. ++ */ ++ if (!group_identity_disabled() && sched_feat(ID_LOAD_BALANCE)) { ++ for_each_cpu(cpu, cpu_smt_mask(core)) { ++ if (id_idle_cpu(p, cpu, is_expellee, &is_idle)) ++ id_idle++; ++ else if (__is_highclass_task(p) && !cpu_rq(cpu)->nr_high_running) ++ preemptable_cpu = cpu; ++ ++ if (!is_idle) ++ idle = false; ++ } ++ } else { ++ for_each_cpu(cpu, cpu_smt_mask(core)) { ++ if (!available_idle_cpu(cpu)) { ++ idle = false; ++ break; ++ } + } +- if (!is_idle) +- idle = false; + } + +- if (idle && (group_identity_disabled() || id_idle)) ++ if (idle) + return core; + +- if (!group_identity_disabled() && id_idle) +- id_backup = core; ++ if (!group_identity_disabled() && sched_feat(ID_LOAD_BALANCE)) { ++ /* look for an available backup core */ ++ if (id_idle == 1 && preemptable_cpu != -1) ++ id_backup = preemptable_cpu; ++ else if (id_idle > 1) ++ id_backup = core; ++ ++ /* The more id_idle_cpu, the better */ ++ if (id_idle > max_id_idle) { ++ max_id_idle = id_idle; ++ max_id_idle_core = id_backup; ++ } ++ } + +- /* +- * This only happens when a CPU is idle but +- * not suitable for underclass task, we +- * should not clear the idle info since this +- * is still a good idle core for others. +- */ +- if (idle) +- do_clear = false; + } + + /* + * Failed to find an idle core; stop looking for one. + */ +- if (do_clear) +- set_idle_cores(target, 0); ++ set_idle_cores(target, 0); + +- return id_backup; ++ return (!group_identity_disabled() && sched_feat(ID_LOAD_BALANCE)) ? max_id_idle_core : -1; + } + + /* +@@ -8095,6 +8127,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t + s64 delta; + int cpu, nr = INT_MAX, id_backup = -1; + bool is_seeker, is_expellee; ++ struct rq *target_rq = cpu_rq(target); + + this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); + if (!this_sd) +@@ -8127,6 +8160,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t + + for_each_cpu_wrap(cpu, cpus, target) { + bool idle; ++ struct rq *rq = cpu_rq(cpu); + + if (!--nr) + return -1; +@@ -8144,6 +8178,12 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t + id_backup = cpu; + } else + break; ++ /* If target is on expel, find an unexpelling cpu as backup */ ++ } else if (!group_identity_disabled() && ++ !sched_feat(ID_EXPELLER_SHARE_CORE) && ++ __rq_on_expel(target_rq) && !__rq_on_expel(rq) && ++ !__is_highclass_task(p) && id_backup == -1) { ++ id_backup = cpu; + } + } + +@@ -8165,6 +8205,8 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + struct sched_domain *sd; + int i, recent_used_cpu; + bool is_expellee = is_expellee_task(p); ++ bool retried = false; ++ struct sched_domain *numa_sd; + + if (id_idle_cpu(p, target, is_expellee, NULL)) + return target; +@@ -8172,18 +8214,22 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + /* + * If the previous CPU is cache affine and idle, don't be stupid: + */ ++ /* If target isn't an id idle core, don't select it */ + if (prev != target && + cpus_share_cache(prev, target) && +- id_idle_cpu(p, prev, is_expellee, NULL)) ++ id_idle_cpu(p, prev, is_expellee, NULL) && ++ id_idle_core(p, prev)) + return prev; + + /* Check a recently used CPU as a potential idle candidate: */ ++ /* If recent_used_cpu isn't an id idle core, don't select it */ + recent_used_cpu = p->recent_used_cpu; + if (recent_used_cpu != prev && + recent_used_cpu != target && + cpus_share_cache(recent_used_cpu, target) && + id_idle_cpu(p, recent_used_cpu, is_expellee, NULL) && +- cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { ++ cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed) && ++ id_idle_core(p, recent_used_cpu)) { + /* + * Replace recent_used_cpu with prev as it is a potential + * candidate for the next wake: +@@ -8196,10 +8242,25 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + if (!sd) + return target; + ++select_idle_core: + i = select_idle_core(p, sd, target); + if ((unsigned)i < nr_cpumask_bits) + return i; + ++ /* ++ * Expeller task prefer to find an idle core, if there's no one in LLC, ++ * find it in higher sched domain. ++ */ ++ if (!group_identity_disabled() && sched_feat(ID_LOAD_BALANCE) && !sched_feat(ID_EXPELLER_SHARE_CORE) && ++ task_is_expeller(p) && !retried) { ++ numa_sd = rcu_dereference(per_cpu(sd_numa, target)); ++ retried = true; ++ if (numa_sd) { ++ sd = numa_sd; ++ goto select_idle_core; ++ } ++ } ++ + i = select_idle_cpu(p, sd, target); + if ((unsigned)i < nr_cpumask_bits) + return i; +@@ -8208,6 +8269,18 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) + if ((unsigned)i < nr_cpumask_bits) + return i; + ++ /* ++ * Don't disturb expeller, find a proper one in higher sched domain. ++ */ ++ if (!group_identity_disabled() && sched_feat(ID_LOAD_BALANCE) && !sched_feat(ID_EXPELLER_SHARE_CORE) && ++ __rq_on_expel(cpu_rq(target)) && !is_expellee && !retried) { ++ numa_sd = rcu_dereference(per_cpu(sd_numa, target)); ++ retried = true; ++ if (numa_sd) { ++ sd = numa_sd; ++ goto select_idle_core; ++ } ++ } + return target; + } + +@@ -8397,15 +8470,18 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) + int new_cpu = prev_cpu; + int want_affine = 0; + int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING); ++ /* If group_identity is disabled, is_highclass_task returns true. */ ++ bool highclass_fork = is_highclass_task(p) && (sd_flag & SD_BALANCE_FORK); + + /* Endow LS task the ability to balance at fork */ +- if (is_highclass_task(p) && (sd_flag & SD_BALANCE_FORK)) ++ if (highclass_fork) + sd_flag |= SD_BALANCE_WAKE; + + if (sd_flag & SD_BALANCE_WAKE) { + record_wakee(p); + want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) + && cpumask_test_cpu(cpu, &p->cpus_allowed) ++ && (group_identity_disabled() || !highclass_fork) + && id_wake_affine(p, cpu, prev_cpu); + } + +@@ -8436,9 +8512,17 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) + if (unlikely(sd)) { + /* Slow path */ + new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag); ++ /* As underclass is calculated into load, rq on expel will ++ * always be selected, if so, select another cpu. ++ * ++ * TODO: take underclass out of load. ++ */ ++ if (!group_identity_disabled() && !sched_feat(ID_EXPELLER_SHARE_CORE) && __rq_on_expel(cpu_rq(new_cpu)) && ++ !is_underclass_task(p)) ++ goto fast_path; + } else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */ + /* Fast path */ +- ++fast_path: + new_cpu = select_idle_sibling(p, prev_cpu, new_cpu); + + if (want_affine) +@@ -11726,6 +11810,9 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) + if (!cpu_active(this_cpu)) + return 0; + ++ /* Do not pull anyone to the cpu on expel */ ++ if (!group_identity_disabled() && !sched_feat(ID_EXPELLER_SHARE_CORE) && __rq_on_expel(this_rq)) ++ return 0; + /* + * This is OK, because current is on_cpu, which avoids it being picked + * for load-balance and preemption/IRQs are still disabled avoiding +diff --git a/kernel/sched/mod/features.h b/kernel/sched/mod/features.h +index f5101b4..53c9e6c 100644 +--- a/kernel/sched/mod/features.h ++++ b/kernel/sched/mod/features.h +@@ -103,4 +103,5 @@ + SCHED_FEAT(ID_LOOSE_EXPEL, false) + SCHED_FEAT(ID_LAST_HIGHCLASS_STAY, true) + SCHED_FEAT(ID_EXPELLER_SHARE_CORE, true) ++SCHED_FEAT(ID_LOAD_BALANCE, true) + #endif +-- +1.8.3.1 + diff --git a/hotfix_conflict_check b/hotfix_conflict_check new file mode 100755 index 0000000000000000000000000000000000000000..1be3371529bc82e543bc7c8156a49703b4176ad9 --- /dev/null +++ b/hotfix_conflict_check @@ -0,0 +1,79 @@ +#!/bin/bash +# Copyright 2019-2022 Alibaba Group Holding Limited. +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +# input file format: +# function sympos module +# +# valid e.g: +# pick_next_task 1 vmlinux +# ext4_free_blocks 2 ext4 + +if [ "$1" == "" ]; then + echo Error: please input files! + exit 1 +elif [ ! -e "$1" ]; then + echo Error: input file is not exist! + exit 1 +else + tainted_file=$1 +fi + +func_list=$(mktemp) + +# Some hotfix do not provide the sympos of patched function, so use a new set +func_list_nosympos=$(mktemp) + +trap "rm -r $func_list $func_list_nosympos" INT HUP QUIT ABRT ALRM TERM EXIT # ensures it is deleted when script ends + +# deal with kpatch prev-0.4 ABI +find /sys/kernel/kpatch/patches/*/functions -type d -not -path "*/functions" 2>/dev/null | while read path ; do + # /sys/kernel/kpatch/patches/kpatch_D689377/functions/blk_mq_update_queue_map -> blk_mq_update_queue_map + func="${path##*/}" + echo "$func" >> $func_list_nosympos +done + +# deal with kpatch 0.4 ABI, livepatch and plugsched +for subdir in kpatch livepatch plugsched; do + find /sys/kernel/$subdir/*/ -type d -path "*,[0-9]" 2>/dev/null | while read path ; do + # /sys/kernel/kpatch/kpatch_5135717/vmlinux/kernfs_find_ns,1 -> kernfs_find_ns,1 + func_ver=`echo $path | awk -F / -e '{print $NF}'` + mod=`echo $path | awk -F / -e '{print $(NF-1)}'` + func=`echo $func_ver | awk -F , '{print $1}'` + ver=`echo $func_ver | awk -F , '{print $2}'` + echo "$func $ver $mod" >> $func_list + done +done + +# deal with manual hotfix that has sys directory entry +find /sys/kernel/manual_*/ -type d -not -path "*manual_*/" 2>/dev/null | while read path ; do + func="${path##*/}" + echo "$func" >> $func_list_nosympos +done + +# deal with manual hotfix that does not have sys directory entry, i.e, the early days implemenation +for func in `cat /proc/kallsyms | grep '\[kpatch_' | grep -v __kpatch | awk '{print $3}' | grep -v 'patch_'`; do + if [ $(grep "e9_$func" /proc/kallsyms | wc -l) -gt 0 ]; then + echo "$func" >> $func_list_nosympos + fi +done + +if [ "$(awk 'END{print NF}' $tainted_file)" != "3" ]; then + # tainted_file provided by manual_hotfix or kpatch-pre-0.4 that don't have the sympos + conflicts=$(sort <(awk '{print $1}' $tainted_file) <(awk '{print $1}' $func_list | sort | uniq) | uniq -d) +else + # Get the conflict functions + conflicts=$(sort $tainted_file <(awk '{print $1" "$2" "$3}' $func_list | sort | uniq) | uniq -d) +fi + +conflicts_nosympos=$(sort <(awk '{print $1}' $tainted_file) <(awk '{print $1}' $func_list_nosympos | sort | uniq) | uniq -d) + +if [ "$conflicts" != "" -o "$conflicts_nosympos" != "" ]; then + echo Error: confict detected: + if [ "$conflicts" != "" ]; then + echo $(awk '{print $1}' <(echo $conflicts)) + elif [ "$conflicts_nosympos" != "" ]; then + echo $conflicts_nosympos + fi + exit 1 +fi diff --git a/plugsched.service b/plugsched.service new file mode 100644 index 0000000000000000000000000000000000000000..656550c9b3685f2271875db59d8df567bfbe1203 --- /dev/null +++ b/plugsched.service @@ -0,0 +1,20 @@ +# Copyright 2019-2022 Alibaba Group Holding Limited. +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +[Unit] +Description=The plugsched service +ConditionKernelCommandLine=!plugsched.enable=0 + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/bin/bash -c "\ + if [ -d /var/plugsched/$(uname -r) ]; then \ + /var/plugsched/$(uname -r)/scheduler-installer install; \ + else \ + echo \"Scheduler for the current kernel version is not installed. Start service failed!\"; \ + exit 1; \ + fi" + +[Install] +WantedBy=multi-user.target diff --git a/scheduler-4.19.91-27.1.tar.gz b/scheduler-4.19.91-27.1.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7e17abf3f5bea8a603126351bf401c6445b60fd3 Binary files /dev/null and b/scheduler-4.19.91-27.1.tar.gz differ diff --git a/scheduler-group-identity.spec b/scheduler-group-identity.spec new file mode 100644 index 0000000000000000000000000000000000000000..6f5108650d7e87dbe6fff346eb7ef60d08086af1 --- /dev/null +++ b/scheduler-group-identity.spec @@ -0,0 +1,110 @@ +# Copyright 2019-2022 Alibaba Group Holding Limited. +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +%define anolis_version 1 +%define KVER 4.19.91 +%define KREL 27.1 +%define anolis_release 1 + +Name: scheduler-group-identity +Version: %{KVER}.%{KREL}.%{anolis_version} +Release: %{anolis_release}%{?dist} +Summary: The scheduler of Group Identity feature of Anolis cloud kernel. +Packager: Erwei Deng + +Group: System Environment/Kernel +License: GPLv2 +URL: https://gitee.com/anolis/scheduler + +Source0: scheduler-%{KVER}-%{KREL}.tar.gz +Source1: plugsched.service +Source2: scheduler-installer +Source3: hotfix_conflict_check +Source4: version + +Patch1001: 1001-sched-fair-Introduce-ID_LOAD_BALANCE.patch + +ExclusiveArch: x86_64 + +# Used to build kernel module and symbol-reserve +BuildRequires: make, gcc-c++, bc, bison, flex, openssl, openssl-devel +BuildRequires: glibc-static, zlib-static, libstdc++-static +BuildRequires: elfutils-devel, elfutils-devel-static, elfutils-libelf-devel +BuildRequires: python3, python3-devel + +Requires: systemd +Requires: binutils + +%description +The scheduler-group-identity is a scheduler with Group-Identity feature of Anolis Kernel Cloud that can be used to CPU co-location scenario. + +%prep +%autosetup -p1 + +%build +# Build scheduler module +make KBUILD_MODPOST_WARN=1 \ + plugsched_tmpdir=working/ \ + plugsched_modpath=kernel/sched/mod/ \ + -f working/Makefile.plugsched plugsched \ + -j 1 + +# Build symbol resolve tool +make -C working/symbol_resolve + +# Generate the tainted_functions file +awk -F '[(,)]' '$2!=""{print $2" "$3" vmlinux"}' kernel/sched/mod/tainted_functions.h > working/tainted_functions + +%install +#install tool, module and systemd service +mkdir -p %{buildroot}/usr/lib/systemd/system +mkdir -p %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch} + +install -m 755 working/symbol_resolve/symbol_resolve \ + %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/symbol_resolve +install -m 755 kernel/sched/mod/scheduler.ko \ + %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/scheduler.ko +install -m 444 working/tainted_functions \ + %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/tainted_functions +install -m 444 working/boundary.yaml \ + %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/boundary.yaml +install -m 644 %{SOURCE1} %{buildroot}/usr/lib/systemd/system/plugsched.service +install -m 755 %{SOURCE2} %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/scheduler-installer +install -m 755 %{SOURCE3} %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/hotfix_conflict_check +install -m 444 %{SOURCE4} %{buildroot}%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/version + +%post +sync + +if [ "$(uname -r)" != "%{KVER}-%{KREL}%{?dist}.%{_arch}" ]; then + echo "INFO: scheduler does not match current kernel version, skip starting service ..." + exit 0 +fi + +echo "Start plugsched.service" +systemctl enable plugsched +systemctl start plugsched + +#uninstall kernel module before remove this rpm-package +%preun +if [ "$(uname -r)" != "%{KVER}-%{KREL}%{?dist}.%{_arch}" ]; then + echo "INFO: scheduler does not match current kernel version, skip unloading module..." + exit 0 +fi + +echo "Stop plugsched.service" +/var/plugsched/$(uname -r)/scheduler-installer uninstall || exit 1 +systemctl stop plugsched + +%postun +systemctl reset-failed plugsched + +%files +%dir %{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch} +/usr/lib/systemd/system/plugsched.service +%{_localstatedir}/plugsched/%{KVER}-%{KREL}%{?dist}.%{_arch}/* + + +%changelog +* Tue Dec 13 2022 Cruz Zhao - 5.10.134.12.2.1-3 +- introduce sched_feat ID_LOAD_BALANCE diff --git a/scheduler-installer b/scheduler-installer new file mode 100755 index 0000000000000000000000000000000000000000..1277c20ed29b387794aba1ea653c69c458352821 --- /dev/null +++ b/scheduler-installer @@ -0,0 +1,85 @@ +#!/bin/bash +# Copyright 2019-2022 Alibaba Group Holding Limited. +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +MAX_LOAD_ATTEMPTS=5 +RETRY_INTERVAL=2 + +cursys=$(uname -r) +modfile=/var/plugsched/$cursys/scheduler.ko +hotfix_conflict_check=/var/plugsched/$cursys/hotfix_conflict_check +tainted_functions=/var/plugsched/$cursys/tainted_functions +enablefile=/sys/kernel/plugsched/plugsched/enable +mod=$(modinfo $modfile | grep vermagic | awk '{print $2}') + +warn() { + echo "scheduler: $*" >&2 +} + +install_module() { + local i=0 + while true; do + out="$(LC_ALL=C insmod "$1" 2>&1)" + [[ -z "$out" ]] && break + echo "$out" 1>&2 + + # Safety check or memory pool allocated failed! Retry in a few seconds. + i=$((i+1)) + if [[ $i -eq $MAX_LOAD_ATTEMPTS ]]; then + warn "load module failed! $1" + exit 1 + else + warn "retrying..." + sleep $RETRY_INTERVAL + fi + done +} + +uninstall_module() { + local i=0 + while true; do + out="$(export LC_ALL=C; sh -c "echo 0 > $enablefile" 2>&1)" + [[ -z "$out" ]] && break + echo "$out" 1>&2 + + # Safety check failed! Retry in a few seconds. + i=$((i+1)) + if [[ $i -eq $MAX_LOAD_ATTEMPTS ]]; then + warn "disable module failed!" + exit 1 + else + warn "retrying..." + sleep $RETRY_INTERVAL + fi + done + rmmod scheduler +} + +if [ "$1" == "install" ]; then + if [ -f "$enablefile" ]; then + echo "scheduler: scheduler module has been installed! Skip..." + exit + fi + + if [ "$cursys" == "$mod" ]; then + $hotfix_conflict_check $tainted_functions || exit 1 + /usr/bin/mkdir -p /run/plugsched + /usr/bin/cp $modfile /run/plugsched/scheduler.ko + /var/plugsched/$(uname -r)/symbol_resolve /run/plugsched/scheduler.ko /proc/kallsyms + install_module /run/plugsched/scheduler.ko + else + warn "Error: kernel version is not same as plugsched version!" + exit 1 + fi +elif [ "$1" == "uninstall" ]; then + if [ -f "$enablefile" ]; then + uninstall_module + else + echo "scheduler: scheduler module has been removed! Skip ..." + fi + + /usr/bin/rm -rf /run/plugsched +else + warn "Error: Unknown operation" + exit 1 +fi diff --git a/version b/version new file mode 100644 index 0000000000000000000000000000000000000000..8d48d70cf347aa8ab367674d367f083db4c66cae --- /dev/null +++ b/version @@ -0,0 +1 @@ +plugsched version: 1.2.0