From 01a5cb562300b3bbfe7fa1e077f194e432a57e0c Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Thu, 2 Jun 2022 15:30:59 +0800 Subject: [PATCH 01/10] src: simplify tainted_functions iteration By adding a NULL obj at the end of the array, just like we usually do in the kernel, the code looks cleaner. Signed-off-by: Yihao Wu --- src/main.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/main.c b/src/main.c index 98e2bff..2362112 100644 --- a/src/main.c +++ b/src/main.c @@ -62,7 +62,8 @@ struct tainted_function { }, struct tainted_function tainted_functions[] = { - #include "tainted_functions.h" + #include "tainted_functions.h", + NULL }; static inline void parallel_state_check_init(void) @@ -499,12 +500,11 @@ static void unregister_plugsched_enable(void) static int register_tainted_functions(void) { - int i; + struct tainted_function *tf; - for (i = 0; i < ARRAY_SIZE(tainted_functions); i++) { - tainted_functions[i].kobj = - kobject_create_and_add(tainted_functions[i].name, vmlinux_moddir); - if (!(tainted_functions[i].kobj)) + for (tf=tainted_functions; tf->name; tf++) { + tf->kobj = kobject_create_and_add(tf->name, vmlinux_moddir); + if (!tf->kobj) return -ENOMEM; } @@ -513,13 +513,12 @@ static int register_tainted_functions(void) static void unregister_tainted_functions(void) { - int i; + struct tainted_function *tf; - for (i = 0; i < ARRAY_SIZE(tainted_functions); i++) { - if (!(tainted_functions[i].kobj)) + for (tf=tainted_functions; tf->name; tf++) { + if (!tf->kobj) return; - - kobject_put(tainted_functions[i].kobj); + kobject_put(tf->kobj); } } -- Gitee From 61e99745678c98a545a641224d827ecd4bd10ac5 Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Thu, 2 Jun 2022 16:40:49 +0800 Subject: [PATCH 02/10] border: exclude __init functions from tainted_functions First, they are not tainted at all, because they are removed from the memory long time ago. We sure can't taint them at all. Second, exclude them from tainted_functions can let us test on these real tainted_functions easier. Signed-off-by: Yihao Wu --- sched_boundary/process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sched_boundary/process.py b/sched_boundary/process.py index b6ec506..01f40d1 100644 --- a/sched_boundary/process.py +++ b/sched_boundary/process.py @@ -189,7 +189,7 @@ if __name__ == '__main__': func_class['sched_outsider'] = func_class['initial_insider'] - func_class['insider'] func_class['optimized_out'] = func_class['sched_outsider'] - func_class['in_vmlinux'] func_class['public_user'] = func_class['fn'] - func_class['insider'] - func_class['border'] - func_class['tainted'] = (func_class['border'] | func_class['insider']) & func_class['in_vmlinux'] + func_class['tainted'] = (func_class['border'] | func_class['insider']) & func_class['in_vmlinux'] - func_class['init'] func_class['undefined'] = func_class['sched_outsider'] | func_class['border'] for output_item in ['sched_outsider', 'fn_ptr', 'interface', 'init', 'insider', 'optimized_out']: -- Gitee From 24b0d9192f61328c442bdcd69401f0f54e22d33b Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Thu, 2 Jun 2022 16:43:46 +0800 Subject: [PATCH 03/10] tests: add insider bypass test Interfaces are easily to prove to be completely bypassed by checking their function header. But to prove insiders are bypassed too, we need to insert an invalid instruction at the original function (of the vmlinux). After we've done it, we can run some coverage tests. Reusing existing those existing tests is a good idea. For now CPU Hotplug, CFS BWC functions and scheduler syscalls functions are already covered. Signed-off-by: Yihao Wu --- tests/bundles/ci | 3 +- tests/test_insider_bypassed/assert | 8 ++ tests/test_insider_bypassed/patch.diff | 133 +++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 1 deletion(-) create mode 100755 tests/test_insider_bypassed/assert create mode 100644 tests/test_insider_bypassed/patch.diff diff --git a/tests/bundles/ci b/tests/bundles/ci index 567d7dc..9f71d22 100644 --- a/tests/bundles/ci +++ b/tests/bundles/ci @@ -3,4 +3,5 @@ public_var var_uniformity cpu_throttle domain_rebuild -sched_syscall \ No newline at end of file +sched_syscall +insider_bypassed diff --git a/tests/test_insider_bypassed/assert b/tests/test_insider_bypassed/assert new file mode 100755 index 0000000..5ae7f79 --- /dev/null +++ b/tests/test_insider_bypassed/assert @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Copyright 2019-2022 Alibaba Group Holding Limited. +# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +# Run some coverage tests +test_sched_syscall/assert || exit 1 +test_cpu_bal/assert || exit 2 +test_cpu_throttle/assert || exit 3 \ No newline at end of file diff --git a/tests/test_insider_bypassed/patch.diff b/tests/test_insider_bypassed/patch.diff new file mode 100644 index 0000000..6c4217f --- /dev/null +++ b/tests/test_insider_bypassed/patch.diff @@ -0,0 +1,133 @@ +// Copyright 2019-2022 Alibaba Group Holding Limited. +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +diff --git a/scheduler/kernel/sched/mod/main.c b/scheduler/kernel/sched/mod/main.c +index 985980b..1962a9a 100644 +--- a/scheduler/kernel/sched/mod/main.c ++++ b/scheduler/kernel/sched/mod/main.c +@@ -51,6 +51,9 @@ struct kobject *plugsched_dir, *plugsched_subdir, *vmlinux_moddir; + + struct tainted_function { + char *name; ++ char *plain_name; ++ char *mod_name; ++ char store_header[2]; + struct kobject *kobj; + }; + +@@ -58,6 +61,8 @@ struct tainted_function { + #define TAINTED_FUNCTION(func,sympos) \ + { \ + .name = #func "," #sympos, \ ++ .plain_name = #func, \ ++ .mod_name = "plugsched:" #func, \ + .kobj = NULL, \ + }, + +@@ -129,6 +134,82 @@ static void disable_stack_protector(void) + static void disable_stack_protector(void) { } + #endif + ++static void uninstall_bypass_checkers(void) ++{ ++ u8 *new_addr, *old_addr; ++ struct tainted_function *tf; ++#ifdef CONFIG_X86_64 ++ unsigned long cr0; ++ cr0 = read_cr0(); ++ do_write_cr0(cr0 & 0xfffeffff); ++#else ++#endif ++ for (tf=tainted_functions; tf->name; tf++) { ++ new_addr = module_kallsyms_lookup_name(tf->mod_name); ++ old_addr = kallsyms_lookup_name(tf->plain_name); ++ ++ /* Whitelist */ ++ if (!strcmp(tf->plain_name, "unregister_sched_domain_sysctl")) ++ continue; ++ ++ if (old_addr == new_addr) ++ panic("Failed to find function %s in vmlinux.", tf->plain_name); ++#ifdef CONFIG_X86_64 ++ if (old_addr[0] == 0xe9) ++ continue; ++ ++ printk("Uninstalling bypass checker: %s\n", tf->plain_name); ++ old_addr[0] = tf->store_header[0]; ++ old_addr[1] = tf->store_header[1]; ++#else ++#endif ++ } ++#ifdef CONFIG_X86_64 ++ do_write_cr0(cr0); ++#else ++#endif ++ printk("Uninstalling bypass checker done\n", tf->plain_name); ++} ++ ++static void install_bypass_checkers(void) ++{ ++ u8 *new_addr, *old_addr; ++ struct tainted_function *tf; ++#ifdef CONFIG_X86_64 ++ unsigned long cr0; ++ cr0 = read_cr0(); ++ do_write_cr0(cr0 & 0xfffeffff); ++#else ++#endif ++ for (tf=tainted_functions; tf->name; tf++) { ++ new_addr = module_kallsyms_lookup_name(tf->mod_name); ++ old_addr = kallsyms_lookup_name(tf->plain_name); ++ ++ /* Whitelist */ ++ if (!strcmp(tf->plain_name, "unregister_sched_domain_sysctl")) ++ continue; ++ ++ if (old_addr == new_addr) ++ panic("Failed to find function %s in vmlinux.", tf->plain_name); ++#ifdef CONFIG_X86_64 ++ if (old_addr[0] == 0xe9) ++ continue; ++ ++ printk("Installing bypass checker: %s\n", tf->plain_name); ++ tf->store_header[0] = old_addr[0]; ++ tf->store_header[1] = old_addr[1]; ++ old_addr[0] = 0xf; ++ old_addr[1] = 0xb; ++#else ++#endif ++ } ++#ifdef CONFIG_X86_64 ++ do_write_cr0(cr0); ++#else ++#endif ++ printk("Installing bypass checker done\n", tf->plain_name); ++} ++ + static int __sync_sched_install(void *arg) + { + int error; +@@ -170,8 +251,10 @@ static int __sync_sched_install(void *arg) + atomic_cond_read_relaxed(&redirect_done, VAL); + rebuild_sched_state(true); + +- if (is_first_process()) ++ if (is_first_process()) { + stop_time_p2 = ktime_get(); ++ install_bypass_checkers(); ++ } + + return 0; + } +@@ -211,8 +294,10 @@ static int __sync_sched_restore(void *arg) + atomic_cond_read_relaxed(&redirect_done, VAL); + rebuild_sched_state(false); + +- if (is_first_process()) ++ if (is_first_process()) { + stop_time_p2 = ktime_get(); ++ uninstall_bypass_checkers(); ++ } + + return 0; + } -- Gitee From 4d58d53b96de1f805b8f01fd1e326104167baf60 Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Fri, 3 Jun 2022 00:16:35 +0800 Subject: [PATCH 04/10] tests: add a test for __schedule function This test aims to verify the correctness of our stack balancing technique. There was test_quick_start similar to this test, so we rename it to this new test. Signed-off-by: Yihao Wu --- tests/bundles/ci | 2 +- tests/{test_quick_start => test_schedule}/assert | 0 tests/{test_quick_start => test_schedule}/patch.diff | 10 ++++++---- 3 files changed, 7 insertions(+), 5 deletions(-) rename tests/{test_quick_start => test_schedule}/assert (100%) rename tests/{test_quick_start => test_schedule}/patch.diff (65%) diff --git a/tests/bundles/ci b/tests/bundles/ci index 9f71d22..95276c4 100644 --- a/tests/bundles/ci +++ b/tests/bundles/ci @@ -1,4 +1,4 @@ -quick_start +schedule public_var var_uniformity cpu_throttle diff --git a/tests/test_quick_start/assert b/tests/test_schedule/assert similarity index 100% rename from tests/test_quick_start/assert rename to tests/test_schedule/assert diff --git a/tests/test_quick_start/patch.diff b/tests/test_schedule/patch.diff similarity index 65% rename from tests/test_quick_start/patch.diff rename to tests/test_schedule/patch.diff index b4be4f3..d374267 100644 --- a/tests/test_quick_start/patch.diff +++ b/tests/test_schedule/patch.diff @@ -5,12 +5,14 @@ diff --git a/scheduler/kernel/sched/mod/core.c b/scheduler/kernel/sched/mod/core index 9f16b72..21262fd 100644 --- a/scheduler/kernel/sched/mod/core.c +++ b/scheduler/kernel/sched/mod/core.c -@@ -3234,6 +3234,8 @@ static void __sched notrace __schedule(bool preempt) +@@ -3248,6 +3248,10 @@ static void __sched notrace __schedule(bool preempt) + struct rq_flags rf; struct rq *rq; int cpu; - -+ printk_once("I am the new scheduler: __schedule\n"); ++ char buf[128]; + ++ sprintf(buf, "%s", "I am the new scheduler: __schedule\n"); ++ printk_once(buf); + cpu = smp_processor_id(); rq = cpu_rq(cpu); - prev = rq->curr; -- Gitee From b79b650e8fce7eaff94873c2bbb3ab04db6185dc Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Mon, 6 Jun 2022 19:12:19 +0800 Subject: [PATCH 05/10] tests: add a memory pressure test Each time alloc 4 stress-ng processes, and in total consume 60% of the available memory, until we have only 50MB left. If rpm, insmod or stress-ng-vm ooms, we exit the test early and gives an warning, but don't fail the test. Signed-off-by: Yihao Wu --- tests/bundles/ci | 1 + tests/test_mem_pressure/assert | 87 ++++++++++++++++++++++++++++++ tests/test_mem_pressure/patch.diff | 0 3 files changed, 88 insertions(+) create mode 100755 tests/test_mem_pressure/assert create mode 100644 tests/test_mem_pressure/patch.diff diff --git a/tests/bundles/ci b/tests/bundles/ci index 95276c4..e42bdf3 100644 --- a/tests/bundles/ci +++ b/tests/bundles/ci @@ -5,3 +5,4 @@ cpu_throttle domain_rebuild sched_syscall insider_bypassed +mem_pressure diff --git a/tests/test_mem_pressure/assert b/tests/test_mem_pressure/assert new file mode 100755 index 0000000..518022d --- /dev/null +++ b/tests/test_mem_pressure/assert @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +import sys +import os +from math import log, ceil +import logging +from time import sleep +from glob import glob +from sh import stress_ng, yum, awk, rpm, lsmod, grep, killall, dmesg + +import coloredlogs +coloredlogs.install(level='INFO') + +class TestMemPressure: + def setup_class(self): + yum('install', 'stress-ng', assumeyes=True) + self.timeout = 120 # Timeout of a single level + self.retry_interval = 5 # Duration bewteen checking memory + self.process_per_level = 4 # To speedup test + self.step = 0.4 # Leaving only (step^level)% of the whole memory each level + min_mem = 50 * 1024 # The most extreme test case + self.all_available = int(awk('/MemAvailable/{print $2}', '/proc/meminfo')) + self.iterations = ceil(log(1.0 * min_mem / self.all_available, self.step)) + self.rpm = self.get_rpm() + + def teardown_class(self): + killall('stress-ng-vm', _ok_code=[0,1]) + if grep(lsmod(), 'scheduler', word_regexp=True, _ok_code=[0,1]).exit_code == 0: + rpm('-e', 'scheduler-xxx') + + def get_rpm(self): + scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) + if len(scheduler_rpm) != 1: + print("Please check your scheduler rpm"); + sys.exit(1) + return scheduler_rpm + + def check_oom(self, pin_mem): + exit_if_oom = '|'.join(['stress-ng-vm', 'rpm', 'insmod']) + if grep(dmesg(), '-P', 'Killed process [0-9]* \((%s)\)' % exit_if_oom, _ok_code=[0,1]).exit_code == 1: + return + logging.warning("Test exited early because oomed when pinning %d kbytes memory" % pin_mem) + self.teardown_class() + os._exit(0) + + def pin_memory(self, target): + left = int(awk('/MemAvailable/{print $2}', '/proc/meminfo')) + if left < target: + return + stress_ng(vm_bytes='%dk' % ceil((left - target) / self.process_per_level), vm_keep=True, vm=self.process_per_level, _bg=True) + + def wait_memory(self, target): + for _ in range(ceil(1.0 * self.timeout / self.retry_interval)): + self.check_oom(target) + if int(awk('/MemAvailable/{print $2}', '/proc/meminfo')) < target: + break + sleep(self.retry_interval) + else: + logging.error("Fails because timeout to pin until only %d kbytes memory left" % target) + self.teardown_class() + os._exit(1) + + def test_level(self, level): + dmesg(clear=True) + target = self.all_available * self.step ** level + self.pin_memory(target) + self.wait_memory(target) + + curr = int(awk('/MemAvailable/{print $2}', '/proc/meminfo')) + logging.info("Installing rpm when available memory = %dKB" % curr) + try: + rpm('-ivh', self.rpm) + grep(lsmod(), 'scheduler', word_regexp=True) + rpm('-e', 'scheduler-xxx') + except Exception: + self.check_oom(target) + + def test_all(self): + for level in range(1, self.iterations): + self.test_level(level) + +if __name__ == '__main__': + test_unit = TestMemPressure() + test_unit.setup_class() + test_unit.test_all() + test_unit.teardown_class() + print("Memory pressure test " + "\033[32mPASS\033[0m") \ No newline at end of file diff --git a/tests/test_mem_pressure/patch.diff b/tests/test_mem_pressure/patch.diff new file mode 100644 index 0000000..e69de29 -- Gitee From da826b155dafa7c9ca482a67c74ee5579d76dfc2 Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Mon, 6 Jun 2022 19:33:30 +0800 Subject: [PATCH 06/10] tests: add a performance test for bare package Run unixbench and will-it-scale, and output log in stdout before and after package is loaded. Signed-off-by: Yihao Wu --- tests/bundles/release | 1 + tests/prep_env | 3 +- tests/test_bare_performance/assert | 58 ++++++++++++++++++++++++++ tests/test_bare_performance/patch.diff | 0 4 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 tests/bundles/release create mode 100755 tests/test_bare_performance/assert create mode 100644 tests/test_bare_performance/patch.diff diff --git a/tests/bundles/release b/tests/bundles/release new file mode 100644 index 0000000..3f5aacc --- /dev/null +++ b/tests/bundles/release @@ -0,0 +1 @@ +bare_performance diff --git a/tests/prep_env b/tests/prep_env index 0521aff..583ca1c 100755 --- a/tests/prep_env +++ b/tests/prep_env @@ -18,8 +18,7 @@ uname_noarch=${uname_r%.*} yum install anolis-repos -y yum install sysstat -y yum install podman yum-utils kernel-debuginfo-${uname_r} kernel-devel-${uname_r} --enablerepo=Plus-debuginfo --enablerepo=Plus -y -pip3 install sh -pip3 install pyyaml +pip3 install sh pyyaml coloredlogs container=$(podman ps -a | awk '$NF=="plugsched"{print $1}') if [ -n "$container" ]; then diff --git a/tests/test_bare_performance/assert b/tests/test_bare_performance/assert new file mode 100755 index 0000000..7e141c1 --- /dev/null +++ b/tests/test_bare_performance/assert @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import sys +import os +from glob import glob +from sh import wget, unzip, make, grep, lsmod, rpm, yum, Command +import coloredlogs +coloredlogs.install(level='INFO') + +class TestMemPressure: + def setup_class(self): + yum.install(['unzip', 'perl-Time-HiRes', 'hwloc-devel', 'python39'], assumeyes=True) + self.rpm = self.get_rpm() + self.install_unixbench() + self.install_will_it_scale() + + def install_unixbench(self): + wget('https://github.com/kdlucas/byte-unixbench/archive/refs/heads/master.zip', + output_document='unixbench.zip') + unzip('-o', 'unixbench.zip') + make(_cwd='byte-unixbench-master/UnixBench') + + def install_will_it_scale(self): + wget('https://github.com/antonblanchard/will-it-scale/archive/refs/heads/master.zip', + output_document='will-it-scale.zip') + unzip('-o', 'will-it-scale.zip') + make(_cwd='will-it-scale-master') + + def teardown_class(self): + if grep(lsmod(), 'scheduler', word_regexp=True, _ok_code=[0,1]).exit_code == 0: + rpm('scheduler-xxx', erase=True) + + def get_rpm(self): + scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) + if len(scheduler_rpm) != 1: + print("Please check your scheduler rpm"); + sys.exit(1) + return scheduler_rpm + + def __test_all(self): + for case in ['getppid1', 'futex1', 'futex2', 'futex3', 'futex4', + 'lock1', 'posix_semaphore1', 'pthread_mutex1', + 'pthread_mutex2', 'pthread_mutex3', + 'pthread_mutex4', 'sched_yield', 'signal1']: + Command('python3.9')('./runtest.py', case, _cwd='will-it-scale-master', _out=sys.stdout) + Command('./Run')(_cwd='byte-unixbench-master/UnixBench', _out=sys.stdout) + + def test_all(self): + self.__test_all() + rpm(self.rpm, install=True) + self.__test_all() + +if __name__ == '__main__': + test_unit = TestMemPressure() + test_unit.setup_class() + test_unit.test_all() + test_unit.teardown_class() + print("Bare package performance test " + "\033[32mPASS\033[0m") \ No newline at end of file diff --git a/tests/test_bare_performance/patch.diff b/tests/test_bare_performance/patch.diff new file mode 100644 index 0000000..e69de29 -- Gitee From 48a7c293ff7948bc69781db2c667945e186378e1 Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Wed, 8 Jun 2022 13:35:34 +0800 Subject: [PATCH 07/10] tests: add SLI continuity test Test whether SLI jumps when installing/removing scheduler Now usr%, sys%, nr_running is covered. Signed-off-by: Yihao Wu --- tests/bundles/ci | 1 + tests/test_sli_continuity/assert | 152 +++++++++++++++++++++++++++ tests/test_sli_continuity/patch.diff | 0 3 files changed, 153 insertions(+) create mode 100755 tests/test_sli_continuity/assert create mode 100644 tests/test_sli_continuity/patch.diff diff --git a/tests/bundles/ci b/tests/bundles/ci index e42bdf3..b2c3fd7 100644 --- a/tests/bundles/ci +++ b/tests/bundles/ci @@ -6,3 +6,4 @@ domain_rebuild sched_syscall insider_bypassed mem_pressure +sli_continuity diff --git a/tests/test_sli_continuity/assert b/tests/test_sli_continuity/assert new file mode 100755 index 0000000..1360f10 --- /dev/null +++ b/tests/test_sli_continuity/assert @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 + +import os +from re import I +import sys +from sh import yum, stress_ng, cat, RunningCommand, getconf, rpm, grep, lsmod, killall, awk +from time import sleep, time +from glob import glob + +epsilon = 0.03 + +# These three functions ensure respective conditions +# and besides, ensure n < 1, which is a common condition +def near(n, m): + return abs(n - m) < epsilon and n < 1 + +def positive(n): + return n > 0 and n < 1 + +def near_greater_equal(n, m): + return n >= m and n < m + epsilon and n < 1 + +class TestSLI: + def setup_class(self): + yum('install', 'stress-ng', assumeyes=True) + self.rpm = self.get_rpm() + print("SLI continuity test") + self.HZ = int(getconf('CLK_TCK')) + + def get_rpm(self): + scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) + if len(scheduler_rpm) != 1: + print("Please check your scheduler rpm"); + sys.exit(1) + return scheduler_rpm + + def task_usage(self, pid:int): + procf = '/proc/%d/stat' % pid + fields = cat(procf).split() + utime = float(fields[13]) / self.HZ + stime = float(fields[14]) / self.HZ + return utime, stime, time() + + def test_util(self, *args, **kwargs): + stress:RunningCommand = stress_ng(**kwargs, _bg=True) + sleep(2) + master = stress.process.pid + worker = int(cat('/proc/{pid}/task/{pid}/children'.format(pid=master)).strip()) + + u0, s0, t0 = self.task_usage(worker) + sleep(2) + u1, s1, t1 = self.task_usage(worker) + rpm(self.rpm, install=True) + u2, s2, t2 = self.task_usage(worker) + sleep(2) + u3, s3, t3 = self.task_usage(worker) + + U0 = (u1 - u0) / (t1 - t0) + S0 = (s1 - s0) / (t1 - t0) + U1 = (u2 - u1) / (t2 - t1) + S1 = (s2 - s1) / (t2 - t1) + U2 = (u3 - u2) / (t3 - t2) + S2 = (s3 - s2) / (t3 - t2) + + return (U0, S0), (U1, S1), (U2, S2) + + def test_usr(self): + (U0, S0), (U1, S1), (U2,S2) = self.test_util(cpu=1, cpu_load=75) + + # Characteristic of 2nd period: + # Don't know exact range, because of stop_machine + # But at least don't exceed 100% + if not positive(U1): + self.error_handler() + + # Characteristic of 3rd period: + if not near(U2, U0): + self.error_handler() + + # Characteristic of all periods: + if not near_greater_equal(S0, 0) or \ + not near_greater_equal(S1, 0) or \ + not near_greater_equal(S2, 0): + self.error_handler() + + killall('stress-ng-cpu', _ok_code=[0,1]) + rpm('scheduler-xxx', erase=True) + + def test_sys(self): + (U0, S0), (U1, S1), (U2,S2) = self.test_util(urandom=1) + + # Characteristic of 1st period: + if not near(S0, 1): + self.error_handler() + + # Characteristic of 2nd period: + if not positive(S1): + self.error_handler() + + # Characteristic of 3rd period: + if not near(S2, S0): + self.error_handler() + + # Characteristic of all periods: + if not near_greater_equal(U0, 0) or \ + not near_greater_equal(U1, 0) or \ + not near_greater_equal(U2, 0): + self.error_handler() + + killall('stress-ng-urand', _ok_code=[0,1]) + rpm('scheduler-xxx', erase=True) + + def test_nr_running(self): + nr_tasks = 500 + stress:RunningCommand = stress_ng(fork=nr_tasks, _bg=True) + sleep(5) + nr_running_0 = int(awk(cat('/proc/stat'), '/procs_running/{print $2}')) + rpm(self.rpm, install=True) + nr_running_1 = int(awk(cat('/proc/stat'), '/procs_running/{print $2}')) + rpm('scheduler-xxx', erase=True) + nr_running_2 = int(awk(cat('/proc/stat'), '/procs_running/{print $2}')) + if not near(1.0 * nr_running_0/nr_tasks, 1) or \ + not near(1.0 * nr_running_1/nr_tasks, 1) or \ + not near(1.0 * nr_running_2/nr_tasks, 1): + self.error_handler() + + killall('stress-ng-fork', _ok_code=[0,1]) + rpm('scheduler-xxx', erase=True) + + def test_all(self): + self.test_usr() + self.test_sys() + self.test_nr_running() + + def teardown_class(self): + killall('stress-ng-cpu', _ok_code=[0,1]) + killall('stress-ng-urand', _ok_code=[0,1]) + killall('stress-ng-fork', _ok_code=[0,1]) + if grep(lsmod(), 'scheduler', word_regexp=True, _ok_code=[0,1]).exit_code == 0: + rpm('scheduler-xxx', erase=True) + + def error_handler(self): + print("CPU util test " + "\033[31mFAILED\033[0m") + self.teardown_class() + raise + +if __name__ == '__main__': + test_unit = TestSLI() + test_unit.setup_class() + test_unit.test_all() + test_unit.teardown_class() + print("SLI continuity test " + "\033[32mPASS\033[0m") diff --git a/tests/test_sli_continuity/patch.diff b/tests/test_sli_continuity/patch.diff new file mode 100644 index 0000000..e69de29 -- Gitee From db235a0d4769875473cbf0c92e4f201592bea297 Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Thu, 9 Jun 2022 14:46:16 +0800 Subject: [PATCH 08/10] tests: add a reboot test This checks if our rpm spec is stable Signed-off-by: Yihao Wu --- tests/bundles/reboot | 1 + tests/test_reboot/assert | 110 +++++++++++++++++++++++++++++++++++ tests/test_reboot/patch.diff | 0 tests/test_reboot/runonce | 46 +++++++++++++++ 4 files changed, 157 insertions(+) create mode 100644 tests/bundles/reboot create mode 100755 tests/test_reboot/assert create mode 100644 tests/test_reboot/patch.diff create mode 100755 tests/test_reboot/runonce diff --git a/tests/bundles/reboot b/tests/bundles/reboot new file mode 100644 index 0000000..b4c99da --- /dev/null +++ b/tests/bundles/reboot @@ -0,0 +1 @@ +reboot diff --git a/tests/test_reboot/assert b/tests/test_reboot/assert new file mode 100755 index 0000000..e293d4c --- /dev/null +++ b/tests/test_reboot/assert @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 + +from sh import Command +import sys +from glob import glob +import os +from sh import rpm, grep, lsmod, grubby, rpm2cpio, echo, cpio, awk, modinfo, yum, reboot, uname +import logging +import coloredlogs +coloredlogs.install(level='INFO') + +runonce = Command('test_reboot/runonce') + +class TestReboot: + def setup_class(self, step, alter_ver=''): + self.step = int(step) + self.alter_ver = alter_ver + + def get_rpm(self): + scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) + if len(scheduler_rpm) != 1: + print("Please check your scheduler rpm"); + sys.exit(1) + return scheduler_rpm + + def get_kernel_from_rpm(self): + rpm = self.get_rpm() + echo('*/scheduler.ko', _out='pattern.txt') + cpio(rpm2cpio(rpm, _piped=True), + to_stdout=True, extract=True, pattern_file='pattern.txt', + _out='scheduler.ko') + return awk(modinfo('scheduler.ko'), '/vermagic/{print $2}').strip() + + def error_handler(self): + print("Reboot test " + "\033[31mFAILED\033[0m") + self.teardown_class() + raise + + def check_scheduler_ver(self, expected): + curr = uname(kernel_release=True).strip() + if expected != curr: + self.error_handler() + + def install_alternative_kernel(self): + curr = uname(kernel_release=True).strip() + uname_noarch = curr[:curr.rfind('.')] + arch = uname(hardware_platform=True).strip() + + installed_kernel = yum.list('kernel', showduplicates=True, color=0, installed=True, enablerepo='Plus') + available_kernel = yum.list('kernel', showduplicates=True, color=0, enablerepo='Plus') + installed_vers = awk(installed_kernel, '/^kernel/{print $2}').splitlines() + available_vers = awk(available_kernel, '/^kernel/{print $2}').splitlines() + + if len(installed_vers) >= 2: + installed_vers.remove(uname_noarch) + return '%s.%s' % (installed_vers[0], arch) + else: + available_vers.remove(uname_noarch) + yum.install('kernel-%s' % available_vers[0], assumeyes=True) + return '%s.%s' % (available_vers[0], arch) + + def change_kernel(self, ver): + vmlinuz = '/boot/vmlinuz-%s' % ver + grubby(set_default=vmlinuz) + + def test_all(self): + if self.step == 0: + # check kernel, install, check ko, change kernel, reboot + logging.info("Running Test Reboot #1") + self.check_scheduler_ver(self.get_kernel_from_rpm()) + rpm(self.get_rpm(), install=True) + grep(lsmod(), 'scheduler', word_regexp=True) + alter_ver = self.install_alternative_kernel() + self.change_kernel(alter_ver) + runonce('test_reboot/assert %d %s' % (self.step + 1, alter_ver)) + elif self.step == 1: + # check kernel, remove, install, change back kernel, reboot + logging.info("Running Test Reboot #2") + self.check_scheduler_ver(self.alter_ver) + rpm('scheduler-xxx', erase=True) + rpm(self.get_rpm(), install=True) + self.change_kernel(self.get_kernel_from_rpm()) + runonce('test_reboot/assert %d' % (self.step + 1)) + elif self.step == 2: + # check kernel, check ko, remove + logging.info("Running Test Reboot #3") + self.check_scheduler_ver(self.get_kernel_from_rpm()) + grep(lsmod(), 'scheduler', word_regexp=True) + rpm('scheduler-xxx', erase=True) + return + else: + return + + reboot() + + def teardown_class(self): + if grep(lsmod(), 'scheduler', word_regexp=True, _ok_code=[0,1]).exit_code == 0: + rpm('scheduler-xxx', erase=True) + +if __name__ == '__main__': + test_unit = TestReboot() + if len(sys.argv) == 1: + args = [0] + elif len(sys.argv) >= 2: + args = sys.argv[1:] + + test_unit.setup_class(*args) + test_unit.test_all() + test_unit.teardown_class() + print("Reboot test " + "\033[32mPASS\033[0m") \ No newline at end of file diff --git a/tests/test_reboot/patch.diff b/tests/test_reboot/patch.diff new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_reboot/runonce b/tests/test_reboot/runonce new file mode 100755 index 0000000..776d141 --- /dev/null +++ b/tests/test_reboot/runonce @@ -0,0 +1,46 @@ +#!/bin/bash + +if [[ $# -eq 0 ]]; then + echo "Schedules a command to be run after the next reboot." + echo "Usage: $(basename $0) " + echo " $(basename $0) -p " + echo " $(basename $0) -r " +else + REMOVE=0 + COMMAND=${!#} + SCRIPTPATH=$PATH + + while getopts ":r:p:" optionName; do + case "$optionName" in + r) REMOVE=1; COMMAND=$OPTARG;; + p) SCRIPTPATH=$OPTARG;; + esac + done + + SCRIPT="${HOME}/.$(basename $0)_$(echo $COMMAND | sed 's/[^a-zA-Z0-9_]/_/g')" + + if [[ ! -f $SCRIPT ]]; then + echo "PATH=$SCRIPTPATH" >> $SCRIPT + echo "cd $(pwd)" >> $SCRIPT + echo "logger -t $(basename $0) -p local3.info \"COMMAND=$COMMAND ; USER=\$(whoami) ($(logname)) ; PWD=$(pwd) ; PATH=\$PATH\"" >> $SCRIPT + echo "$0 -r \"$(echo $COMMAND | sed 's/\"/\\\"/g')\"" >> $SCRIPT + echo "$COMMAND | logger -t $(basename $0) -p local3.info" >> $SCRIPT + chmod +x $SCRIPT + fi + + CRONTAB="${HOME}/.$(basename $0)_temp_crontab_$RANDOM" + ENTRY="@reboot $SCRIPT" + + echo "$(crontab -l 2>/dev/null)" | grep -v "$ENTRY" | grep -v "^# DO NOT EDIT THIS FILE - edit the master and reinstall.$" | grep -v "^# ([^ ]* installed on [^)]*)$" | grep -v "^# (Cron version [^$]*\$[^$]*\$)$" > $CRONTAB + + if [[ $REMOVE -eq 0 ]]; then + echo "$ENTRY" >> $CRONTAB + fi + + crontab $CRONTAB + rm $CRONTAB + + if [[ $REMOVE -ne 0 ]]; then + rm $SCRIPT + fi +fi -- Gitee From e4654f6e71d1387fc3a13f434282d296f9c946d6 Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Fri, 10 Jun 2022 00:01:10 +0800 Subject: [PATCH 09/10] config: add wake_up_idle_ht as an interface function Due to springboard technique, __schedule is divided into two halves. The top half can be seen as an interface function. And the bottom half can be seen as an outsider function. wake_up_idle_ht is called by the outsider half of __schedule. So it should've been infected as an outsider too. But the current sched_boundary algorithm sees __schedule as a whole interface function. To allow users to modify this scheduler function, we add it as an interface function. Signed-off-by: Yihao Wu --- configs/4.19/sched_boundary.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/configs/4.19/sched_boundary.yaml b/configs/4.19/sched_boundary.yaml index e973c06..ef3d90f 100644 --- a/configs/4.19/sched_boundary.yaml +++ b/configs/4.19/sched_boundary.yaml @@ -30,6 +30,7 @@ interface_prefix: - __arm64_compat_sys_ function: interface: + - wake_up_idle_ht - do_set_cpus_allowed - set_user_nice - sched_setscheduler -- Gitee From 743a35226ccba147624e3cb206eff385fb9fc027 Mon Sep 17 00:00:00 2001 From: Yihao Wu Date: Fri, 10 Jun 2022 00:53:43 +0800 Subject: [PATCH 10/10] tests: use raise rather than exit This ease the work to debug. By the way, remove trailing spaces. Signed-off-by: Yihao Wu --- tests/test_bare_performance/assert | 2 +- tests/test_cpu_throttle/assert | 10 +++++----- tests/test_domain_rebuild/assert | 8 ++++---- tests/test_mem_pressure/assert | 2 +- tests/test_public_var/assert | 4 ++-- tests/test_reboot/assert | 2 +- tests/test_sched_syscall/assert | 8 ++++---- tests/test_sli_continuity/assert | 2 +- tests/test_var_uniformity/assert | 8 ++++---- 9 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tests/test_bare_performance/assert b/tests/test_bare_performance/assert index 7e141c1..a005ecf 100755 --- a/tests/test_bare_performance/assert +++ b/tests/test_bare_performance/assert @@ -34,7 +34,7 @@ class TestMemPressure: scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) if len(scheduler_rpm) != 1: print("Please check your scheduler rpm"); - sys.exit(1) + raise return scheduler_rpm def __test_all(self): diff --git a/tests/test_cpu_throttle/assert b/tests/test_cpu_throttle/assert index 435b397..8ccaad8 100755 --- a/tests/test_cpu_throttle/assert +++ b/tests/test_cpu_throttle/assert @@ -12,7 +12,7 @@ class TestCPUThrottle: self.test_flag = True print("CPU throttle test") sh.mkdir('/sys/fs/cgroup/cpu/test') - + def init_cgroup(self): cmd = "while :; do :; done" self.start_time = time.time() @@ -31,7 +31,7 @@ class TestCPUThrottle: self.set_cfs_quota('100000') self.check_gt_75() self.check_after_unload() - + def check_le_75(self): cpu_util = self.get_cpu_util(self.child.pid) # assert cpu_util <= 75 @@ -43,7 +43,7 @@ class TestCPUThrottle: if len(scheduler_rpm) != 1: print("Please check your scheduler rpm"); self.teardown_class() - sys.exit(1) + raise scheduler_rpm = scheduler_rpm[0] sh.rpm('-ivh', scheduler_rpm) cpu_util = self.get_cpu_util(self.child.pid) @@ -73,7 +73,7 @@ class TestCPUThrottle: total_time = float(process_stat[13]) + float(process_stat[14]) elapsed_seconds = time.time() - self.start_time return 100.0 * ((total_time / herts) / elapsed_seconds) - + cpu_util_1 = cpu_usage() cpu_util_2 = cpu_usage() cpu_util = (cpu_util_1 + cpu_util_2) / 2 @@ -92,7 +92,7 @@ class TestCPUThrottle: print(err_msg) print("CPU throttle test " + "\033[31mFAILED\033[0m") self.teardown_class() - sys.exit(1) + raise if __name__ == '__main__': diff --git a/tests/test_domain_rebuild/assert b/tests/test_domain_rebuild/assert index 367faf4..fa04250 100755 --- a/tests/test_domain_rebuild/assert +++ b/tests/test_domain_rebuild/assert @@ -18,7 +18,7 @@ class CPUDomainReuildTest: def add_cpu(num): cpu_state = "/sys/devices/system/cpu/cpu" + str(num) + "/online" self.cpu_set.append(cpu_state) - + for i in range(0, 6, 2): add_cpu(i) @@ -67,7 +67,7 @@ class CPUDomainReuildTest: if len(scheduler_rpm) != 1: print("Please check your scheduler rpm"); self.teardown_class() - sys.exit(1) + raise scheduler_rpm = scheduler_rpm[0] sh.rpm('-ivh', scheduler_rpm) @@ -91,7 +91,7 @@ class CPUDomainReuildTest: print("CPU domain rebuild test " + "\033[31mFAILED\033[0m") self.unload_scheduler() self.reload_cpu_state() - sys.exit(1) + raise def teardown_class(self): self.unload_scheduler() @@ -103,5 +103,5 @@ if __name__ == '__main__': unit_test.test_cpu_rebuild() unit_test.teardown_class() print("CPU domain rebuild test " + "\033[32mPASS\033[0m") - + diff --git a/tests/test_mem_pressure/assert b/tests/test_mem_pressure/assert index 518022d..a782767 100755 --- a/tests/test_mem_pressure/assert +++ b/tests/test_mem_pressure/assert @@ -32,7 +32,7 @@ class TestMemPressure: scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) if len(scheduler_rpm) != 1: print("Please check your scheduler rpm"); - sys.exit(1) + raise return scheduler_rpm def check_oom(self, pin_mem): diff --git a/tests/test_public_var/assert b/tests/test_public_var/assert index 8ea7a09..4b39c0a 100755 --- a/tests/test_public_var/assert +++ b/tests/test_public_var/assert @@ -15,7 +15,7 @@ class TestPublicVar: if len(scheduler_rpm) != 1: print("Please check your scheduler rpm"); self.teardown_class() - sys.exit(1) + raise scheduler_rpm = scheduler_rpm[0] sh.rpm('-ivh', scheduler_rpm) module = '/var/plugsched/' + cur_sys + '/scheduler.ko' @@ -43,7 +43,7 @@ class TestPublicVar: print("Public var: " + str(var) + "is not UND") self.teardown_class() print("Public vars test " + "\033[31mFAILED\033[0m") - sys.exit(1) + raise def teardown_class(self): tmp = subprocess.Popen("lsmod | grep scheduler", shell=True, stdout=subprocess.PIPE) diff --git a/tests/test_reboot/assert b/tests/test_reboot/assert index e293d4c..a5640b7 100755 --- a/tests/test_reboot/assert +++ b/tests/test_reboot/assert @@ -20,7 +20,7 @@ class TestReboot: scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) if len(scheduler_rpm) != 1: print("Please check your scheduler rpm"); - sys.exit(1) + raise return scheduler_rpm def get_kernel_from_rpm(self): diff --git a/tests/test_sched_syscall/assert b/tests/test_sched_syscall/assert index 419692c..7e46be1 100755 --- a/tests/test_sched_syscall/assert +++ b/tests/test_sched_syscall/assert @@ -17,7 +17,7 @@ class TestSchedSyscall: if len(scheduler_rpm) != 1: print("Please check your scheduler rpm"); self.teardown_class() - sys.exit(1) + raise scheduler_rpm = scheduler_rpm[0] sh.rpm('-ivh', scheduler_rpm) @@ -33,7 +33,7 @@ class TestSchedSyscall: if fa_mems != ch_mems or fa_cpus != ch_cpus: self.error_handler() sh.rmdir("/sys/fs/cgroup/cpuset/test") - + def test_policy_and_prio(self): cmd = "chrt -p -f 10 " + str(self.child.pid) subprocess.Popen(cmd, shell=True) @@ -48,10 +48,10 @@ class TestSchedSyscall: def error_handler(self): self.child.kill() self.child.wait() - sh.rmdir("/sys/fs/cgroup/cpuset/test") + sh.rmdir("/sys/fs/cgroup/cpuset/test", _ok_code=[0,1]) self.unload_scheduler() print("Sched syscall test " + "\033[31mFAILED\033[0m") - sys.exit(1) + raise def unload_scheduler(self): tmp = subprocess.Popen("lsmod | grep scheduler", shell=True, stdout=subprocess.PIPE) diff --git a/tests/test_sli_continuity/assert b/tests/test_sli_continuity/assert index 1360f10..58343b0 100755 --- a/tests/test_sli_continuity/assert +++ b/tests/test_sli_continuity/assert @@ -31,7 +31,7 @@ class TestSLI: scheduler_rpm = glob(os.path.join('/tmp/work', 'scheduler*.rpm')) if len(scheduler_rpm) != 1: print("Please check your scheduler rpm"); - sys.exit(1) + raise return scheduler_rpm def task_usage(self, pid:int): diff --git a/tests/test_var_uniformity/assert b/tests/test_var_uniformity/assert index 317db75..b54908b 100755 --- a/tests/test_var_uniformity/assert +++ b/tests/test_var_uniformity/assert @@ -31,14 +31,14 @@ class TestVarUniformity: "/proc/sys/kernel/sched_cfs_bandwidth_slice_us", "/sys/kernel/debug/sched_debug", ] - + def before_change(self): self.orig_data = {} self.record_data(self.orig_data) self.load_scheduler() self.data_after_load = {} self.record_data(self.data_after_load) - + def record_data(self, dict: Dict): for item in self.global_name: if not os.path.exists(item): @@ -50,7 +50,7 @@ class TestVarUniformity: if len(scheduler_rpm) != 1: print("Please check your scheduler rpm"); self.teardown_class() - sys.exit(1) + raise scheduler_rpm = scheduler_rpm[0] sh.rpm('-ivh', scheduler_rpm) @@ -89,7 +89,7 @@ class TestVarUniformity: def error_handler(self): print("Var uniformity test " + "\033[31mFAILED\033[0m") self.teardown_class() - sys.exit(1) + raise if __name__ == '__main__': unit_test = TestVarUniformity() -- Gitee