From 6214c429292cbf5e8e08579851808cab33627041 Mon Sep 17 00:00:00 2001 From: overweight Date: Tue, 28 Dec 2021 15:03:54 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9run=5Fwith=5Fsd,=20=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E9=AA=8C=E8=AF=81=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- run_with_sd/README.md | 42 ++++- run_with_sd/patches/0004-add-signal.patch | 150 +++++++++--------- .../patches/0005-donot-wait-in-shutdown.patch | 29 ++++ .../patches/0006-reexec-when-crash.patch | 95 +++++++++++ run_with_sd/patches/systemd-248-13.src.rpm | 1 + run_with_sd/systemd-dockerimg/.dockerignore | 2 + run_with_sd/systemd-dockerimg/Dockerfile | 3 + run_with_sd/systemd-dockerimg/build.sh | 10 ++ run_with_sd/systemd-dockerimg/openEuler.repo | 50 ++++++ 9 files changed, 306 insertions(+), 76 deletions(-) create mode 100644 run_with_sd/patches/0005-donot-wait-in-shutdown.patch create mode 100644 run_with_sd/patches/0006-reexec-when-crash.patch create mode 100644 run_with_sd/patches/systemd-248-13.src.rpm create mode 100755 run_with_sd/systemd-dockerimg/.dockerignore create mode 100644 run_with_sd/systemd-dockerimg/Dockerfile create mode 100755 run_with_sd/systemd-dockerimg/build.sh create mode 100644 run_with_sd/systemd-dockerimg/openEuler.repo diff --git a/run_with_sd/README.md b/run_with_sd/README.md index 03b3b51c..0ab5f7cc 100644 --- a/run_with_sd/README.md +++ b/run_with_sd/README.md @@ -1,8 +1,48 @@ -# 兼容systemd模式运行问题 +# 兼容systemd模式运行 ## 思路 +以process1为1号进程,拉起systemd以非1号运行,并负责监控systemd的运行状态。 ## 适配 +基于openEuler 21.09构建systemd,需要适配systemd源码以支持非1号运行。 + +适配的代码放在[patches](patches/)目录,适配的systemd版本是systemd-248.13。 + +本次适配用于原型验证,故相关代码的修改皆以简单原则,满足功能验证即可。 + +适配中,部分问题未解决,规避处理。 + +1. 运行systemd需要关闭selinux, /etc/selinux/config, enforcing to disabled +2. shutdown中需要增加信号处理代码,用以处理非1号执行时接受子进程信号。当前直接跳过,不做处理。 +3. systemd从crash中恢复的适配,未考虑状态不可信的问题。因此构造crash的过程中,有概率systemd无法从crash中恢复。 ## 验证 +至少有两种方式验证。 +### 容器方式(在容器中非1号进程运行systemd) +1. 使用[patches](patches/)目录下的适配代码编译systemd,并将输出的rpms,放在![systemd-dockerimg/rpms](systemd-dockerimg/rpms/)目录下。 +2. 使用[build.sh](systemd-dockerimg/build.sh)构建systemd的容器image。可以`chroot rootfs /bin/bash`,通过`password`命令修改登录密码,然后执行`build.sh`更新镜像。 +3. 修改仓库根目录下的Dockerfile,修改`FROM scratch`为`FROM systemd`, 去除`#RUN rm -f /sbin/init`注释 并在根目录下执行`./docker-run.sh /usr/lib/systemd/systemd` +4. 可另起窗口,执行`docker exec -it prun bash`进入容器。 + +### 虚拟机方式(在虚拟机中非1号进程运行systemd) +1. 安装21.09虚拟机镜像。 +2. 禁用selinux,/etc/selinux/config, enforcing to disabled +3. 将systemd的rpms上传到虚拟机中,并执行`rpm -Fvh *.rpm`安装升级。 +4. 将process1中编译的init进程,替换到虚拟机/init, /sbin/init,建议先`rm /sbin/init`删除init软链接,否则会覆盖systemd程序。 +5. 修改dracut,`/usr/lib/dracut/modules.d/00systemd/module-setup.sh`, 替换 + +``` +ln_r "$systemdutildir"/systemd "/init" +ln_r "$systemdutildir"/systemd "/sbin/init" +``` +为 + +``` +inst_multiple -o \ + /init \ + /sbin/init +``` +6. 执行`dracut -f`覆盖更新initrd,reboot重启验证。 + + diff --git a/run_with_sd/patches/0004-add-signal.patch b/run_with_sd/patches/0004-add-signal.patch index d1cd3c7b..988c32a8 100644 --- a/run_with_sd/patches/0004-add-signal.patch +++ b/run_with_sd/patches/0004-add-signal.patch @@ -1,15 +1,14 @@ -From 6f06418621fccb6237c879079ff4656138890249 Mon Sep 17 00:00:00 2001 -From: Ruidong Cao -Date: Fri, 19 Nov 2021 02:32:26 +0800 +From 81e1652c378be010b64b6e49b22ccaaebd709ecc Mon Sep 17 00:00:00 2001 +From: rpm-build +Date: Tue, 21 Dec 2021 15:09:31 +0800 Subject: [PATCH] add signal -Signed-off-by: Ruidong Cao --- - src/core/manager.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 65 insertions(+) + src/core/manager.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 67 insertions(+) diff --git a/src/core/manager.c b/src/core/manager.c -index 629966ea60..02eb7fd8f3 100644 +index 58345e1..e509e8d 100644 --- a/src/core/manager.c +++ b/src/core/manager.c @@ -524,6 +524,7 @@ static int manager_setup_signals(Manager *m) { @@ -20,78 +19,79 @@ index 629966ea60..02eb7fd8f3 100644 /* ... space for more special targets ... */ -@@ -2777,6 +2778,70 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t - } +@@ -2778,6 +2779,72 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t switch (sfsi.ssi_signo - SIGRTMIN) { -+ case 7: ; -+ siginfo_t si = { -+ .si_signo = sfsi.ssi_signo, -+ .si_code = sfsi.ssi_code, -+ .si_pid = sfsi.ssi_pid, -+ .si_status = sfsi.ssi_status -+ }; -+ if (IN_SET(si.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) { -+ _cleanup_free_ Unit **array_copy = NULL; -+ _cleanup_free_ char *name = NULL; -+ Unit *u1, *u2, **array; -+ -+ (void) get_process_comm(si.si_pid, &name); -+ -+ log_debug("Child "PID_FMT" (%s) died (code=%s, status=%i/%s)", -+ si.si_pid, strna(name), -+ sigchld_code_to_string(si.si_code), -+ si.si_status, -+ strna(si.si_code == CLD_EXITED -+ ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL) -+ : signal_to_string(si.si_status))); -+ -+ /* Increase the generation counter used for filtering out duplicate unit invocations */ -+ m->sigchldgen++; -+ -+ /* And now figure out the unit this belongs to, it might be multiple... */ -+ u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid); -+ u2 = hashmap_get(m->watch_pids, PID_TO_PTR(si.si_pid)); -+ array = hashmap_get(m->watch_pids, PID_TO_PTR(-si.si_pid)); -+ if (array) { -+ size_t cnt = 0; -+ -+ /* Count how many entries the array has */ -+ while (array[cnt]) -+ cnt++; -+ -+ /* Make a copy of the array so that we don't trip up on the array changing beneath us */ -+ array_copy = newdup(Unit*, array, cnt+1); -+ if (!array_copy) -+ log_oom(); -+ } -+ -+ /* Finally, execute them all. Note that u1, u2 and the array might contain duplicates, but -+ * that's fine, manager_invoke_sigchld_event() will ensure we only invoke the handlers once for -+ * each iteration. */ -+ if (u1) { -+ /* We check for oom condition, in case we got SIGCHLD before the oom notification. -+ * We only do this for the cgroup the PID belonged to. */ -+ (void) unit_check_oom(u1); -+ -+ /* This only logs for now. In the future when the interface for kills/notifications -+ * is more stable we can extend service results table similar to how kernel oom kills -+ * are managed. */ -+ (void) unit_check_oomd_kill(u1); -+ -+ manager_invoke_sigchld_event(m, u1, &si); -+ } -+ if (u2) -+ manager_invoke_sigchld_event(m, u2, &si); -+ if (array_copy) -+ for (size_t i = 0; array_copy[i]; i++) -+ manager_invoke_sigchld_event(m, array_copy[i], &si); -+ syscall(__NR_rt_sigqueueinfo, 1, sfsi.ssi_signo, &si); -+ } -+ break; ++ case 7: ++ siginfo_t si = { ++ .si_signo = sfsi.ssi_signo, ++ .si_code = sfsi.ssi_code, ++ .si_pid = sfsi.ssi_pid, ++ .si_status = sfsi.ssi_status ++ }; ++ if (IN_SET(si.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED)) { ++ _cleanup_free_ Unit **array_copy = NULL; ++ _cleanup_free_ char *name = NULL; ++ Unit *u1, *u2, **array; ++ ++ (void) get_process_comm(si.si_pid, &name); ++ ++ log_debug("Child "PID_FMT" (%s) died (code=%s, status=%i/%s)", ++ si.si_pid, strna(name), ++ sigchld_code_to_string(si.si_code), ++ si.si_status, ++ strna(si.si_code == CLD_EXITED ++ ? exit_status_to_string(si.si_status, EXIT_STATUS_FULL) ++ : signal_to_string(si.si_status))); ++ ++ /* Increase the generation counter used for filtering out duplicate unit invocations */ ++ m->sigchldgen++; ++ ++ /* And now figure out the unit this belongs to, it might be multiple... */ ++ u1 = manager_get_unit_by_pid_cgroup(m, si.si_pid); ++ u2 = hashmap_get(m->watch_pids, PID_TO_PTR(si.si_pid)); ++ array = hashmap_get(m->watch_pids, PID_TO_PTR(-si.si_pid)); ++ if (array) { ++ size_t cnt = 0; ++ ++ /* Count how many entries the array has */ ++ while (array[cnt]) ++ cnt++; ++ ++ /* Make a copy of the array so that we don't trip up on the array changing beneath us */ ++ array_copy = newdup(Unit*, array, cnt+1); ++ if (!array_copy) ++ log_oom(); ++ } ++ ++ /* Finally, execute them all. Note that u1, u2 and the array might contain duplicates, but ++ * that's fine, manager_invoke_sigchld_event() will ensure we only invoke the handlers once for ++ * each iteration. */ ++ if (u1) { ++ /* We check for oom condition, in case we got SIGCHLD before the oom notification. ++ * We only do this for the cgroup the PID belonged to. */ ++ (void) unit_check_oom(u1); ++ ++ /* This only logs for now. In the future when the interface for kills/notifications ++ * is more stable we can extend service results table similar to how kernel oom kills ++ * are managed. */ ++ (void) unit_check_oomd_kill(u1); ++ ++ manager_invoke_sigchld_event(m, u1, &si); ++ } ++ if (u2) ++ manager_invoke_sigchld_event(m, u2, &si); ++ if (array_copy) ++ for (size_t i = 0; array_copy[i]; i++) + manager_invoke_sigchld_event(m, array_copy[i], &si); ++ syscall(__NR_rt_sigqueueinfo, 1, sfsi.ssi_signo, &si); ++ } ++ break; ++ case 20: manager_override_show_status(m, SHOW_STATUS_YES, "signal"); + break; -- -2.25.1 +2.30.0 diff --git a/run_with_sd/patches/0005-donot-wait-in-shutdown.patch b/run_with_sd/patches/0005-donot-wait-in-shutdown.patch new file mode 100644 index 00000000..6fe4fe90 --- /dev/null +++ b/run_with_sd/patches/0005-donot-wait-in-shutdown.patch @@ -0,0 +1,29 @@ +From 26ef072231cf0b1489d4e155ed7dc05af69a3d61 Mon Sep 17 00:00:00 2001 +From: rpm-build +Date: Tue, 21 Dec 2021 16:12:22 +0800 +Subject: [PATCH] donot wait in shutdown + +--- + src/shutdown/shutdown.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/shutdown/shutdown.c b/src/shutdown/shutdown.c +index d2b1629..70fd913 100644 +--- a/src/shutdown/shutdown.c ++++ b/src/shutdown/shutdown.c +@@ -390,10 +390,10 @@ int main(int argc, char *argv[]) { + disable_binfmt(); + + log_info("Sending SIGTERM to remaining processes..."); +- broadcast_signal(SIGTERM, true, true, arg_timeout); ++// broadcast_signal(SIGTERM, true, true, arg_timeout); + + log_info("Sending SIGKILL to remaining processes..."); +- broadcast_signal(SIGKILL, true, false, arg_timeout); ++// broadcast_signal(SIGKILL, true, false, arg_timeout); + + need_umount = !in_container; + need_swapoff = !in_container; +-- +2.30.0 + diff --git a/run_with_sd/patches/0006-reexec-when-crash.patch b/run_with_sd/patches/0006-reexec-when-crash.patch new file mode 100644 index 00000000..5d99c625 --- /dev/null +++ b/run_with_sd/patches/0006-reexec-when-crash.patch @@ -0,0 +1,95 @@ +From f6db04bb96b9600dfd36afdb3323c2bb80424993 Mon Sep 17 00:00:00 2001 +From: rpm-build +Date: Tue, 28 Dec 2021 11:17:07 +0800 +Subject: [PATCH] reexec when crash + +--- + src/core/main.c | 44 +++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 43 insertions(+), 1 deletion(-) + +diff --git a/src/core/main.c b/src/core/main.c +index d0b1af0..e53b225 100644 +--- a/src/core/main.c ++++ b/src/core/main.c +@@ -104,6 +104,7 @@ static enum { + } arg_action = ACTION_RUN; + + static const char *arg_bus_introspect = NULL; ++static Manager *g_m = NULL; + + /* Those variables are initialized to 0 automatically, so we avoid uninitialized memory access. Real + * defaults are assigned in reset_arguments() below. */ +@@ -194,14 +195,31 @@ static int manager_find_user_config_paths(char ***ret_files, char ***ret_dirs) { + return 0; + } + ++static int prepare_reexecute( ++ Manager *m, ++ FILE **ret_f, ++ FDSet **ret_fds, ++ bool switching_root); ++ ++static void do_reexecute( ++ int argc, ++ char *argv[], ++ const struct rlimit *saved_rlimit_nofile, ++ const struct rlimit *saved_rlimit_memlock, ++ FDSet *fds, ++ const char *switch_root_dir, ++ const char *switch_root_init, ++ const char **ret_error_message); ++ + _noreturn_ static void freeze_or_exit_or_reboot(void) { + + /* If we are running in a container, let's prefer exiting, after all we can propagate an exit code to +- * the container manager, and thus inform it that something went wrong. */ ++ * the container manager, and thus inform it that something went wrong. + if (detect_container() > 0) { + log_emergency("Exiting PID 1..."); + _exit(EXIT_EXCEPTION); + } ++ */ + + if (arg_crash_reboot) { + log_notice("Rebooting in 10s..."); +@@ -212,6 +230,28 @@ _noreturn_ static void freeze_or_exit_or_reboot(void) { + log_emergency_errno(errno, "Failed to reboot: %m"); + } + ++ if(g_m && getpid_cached() == 1){ ++ _cleanup_fdset_free_ FDSet *fds = NULL; ++ struct rlimit saved_rlimit_nofile = RLIMIT_MAKE_CONST(0), ++ saved_rlimit_memlock = RLIMIT_MAKE_CONST(RLIM_INFINITY); ++ const char *error_message = NULL; ++ ++ log_emergency("Recover systemd from crash"); ++ g_m->objective = MANAGER_REEXECUTE; ++ prepare_reexecute(g_m, &arg_serialization, &fds, false); ++ pager_close(); ++ g_m = manager_free(g_m); ++ mac_selinux_finish(); ++ do_reexecute(saved_argc, saved_argv, ++ &saved_rlimit_nofile, ++ &saved_rlimit_memlock, ++ fds, ++ NULL, ++ NULL, ++ &error_message); ++ log_emergency("Fail to reexecute after crash"); ++ } ++ + log_emergency("Freezing execution."); + freeze(); + } +@@ -2846,6 +2886,8 @@ int main(int argc, char *argv[]) { + goto finish; + } + ++ g_m = m; ++ + m->timestamps[MANAGER_TIMESTAMP_KERNEL] = kernel_timestamp; + m->timestamps[MANAGER_TIMESTAMP_INITRD] = initrd_timestamp; + m->timestamps[MANAGER_TIMESTAMP_USERSPACE] = userspace_timestamp; +-- +2.30.0 + diff --git a/run_with_sd/patches/systemd-248-13.src.rpm b/run_with_sd/patches/systemd-248-13.src.rpm new file mode 100644 index 00000000..96d19bc7 --- /dev/null +++ b/run_with_sd/patches/systemd-248-13.src.rpm @@ -0,0 +1 @@ +https://repo.openeuler.org/openEuler-21.09/source/Packages/systemd-248-13.oe1.src.rpm \ No newline at end of file diff --git a/run_with_sd/systemd-dockerimg/.dockerignore b/run_with_sd/systemd-dockerimg/.dockerignore new file mode 100755 index 00000000..446f3f50 --- /dev/null +++ b/run_with_sd/systemd-dockerimg/.dockerignore @@ -0,0 +1,2 @@ +* +!rootfs diff --git a/run_with_sd/systemd-dockerimg/Dockerfile b/run_with_sd/systemd-dockerimg/Dockerfile new file mode 100644 index 00000000..faf314ea --- /dev/null +++ b/run_with_sd/systemd-dockerimg/Dockerfile @@ -0,0 +1,3 @@ +FROM scratch +COPY rootfs / +CMD ["/sbin/init"] diff --git a/run_with_sd/systemd-dockerimg/build.sh b/run_with_sd/systemd-dockerimg/build.sh new file mode 100755 index 00000000..8b623f05 --- /dev/null +++ b/run_with_sd/systemd-dockerimg/build.sh @@ -0,0 +1,10 @@ +#!/bin/bash +yum -c openEuler.repo --installroot=$PWD/rootfs install systemd -y +cp -a rpms/systemd*.rpm rootfs/ +if [ $? -ne 0 ]; then + echo "put your rpms into dir rpms" + exit 1 +fi +chroot rootfs /bin/bash -c "rpm -Fvh *" +rm -rf rootfs/systemd*.rpm +docker build --no-cache --tag systemd . diff --git a/run_with_sd/systemd-dockerimg/openEuler.repo b/run_with_sd/systemd-dockerimg/openEuler.repo new file mode 100644 index 00000000..4230b329 --- /dev/null +++ b/run_with_sd/systemd-dockerimg/openEuler.repo @@ -0,0 +1,50 @@ +#generic-repos is licensed under the Mulan PSL v2. +#You can use this software according to the terms and conditions of the Mulan PSL v2. +#You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +#THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +#IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +#PURPOSE. +#See the Mulan PSL v2 for more details. + +[OS] +name=OS +baseurl=http://repo.openeuler.org/openEuler-21.09/OS/$basearch/ +enabled=1 +gpgcheck=1 +gpgkey=http://repo.openeuler.org/openEuler-21.09/OS/$basearch/RPM-GPG-KEY-openEuler + +[everything] +name=everything +baseurl=http://repo.openeuler.org/openEuler-21.09/everything/$basearch/ +enabled=0 +gpgcheck=1 +gpgkey=http://repo.openeuler.org/openEuler-21.09/everything/$basearch/RPM-GPG-KEY-openEuler + +[EPOL] +name=EPOL +baseurl=http://repo.openeuler.org/openEuler-21.09/EPOL/$basearch/ +enabled=0 +gpgcheck=1 +gpgkey=http://repo.openeuler.org/openEuler-21.09/OS/$basearch/RPM-GPG-KEY-openEuler + +[debuginfo] +name=debuginfo +baseurl=http://repo.openeuler.org/openEuler-21.09/debuginfo/$basearch/ +enabled=0 +gpgcheck=1 +gpgkey=http://repo.openeuler.org/openEuler-21.09/debuginfo/$basearch/RPM-GPG-KEY-openEuler + +[source] +name=source +baseurl=http://repo.openeuler.org/openEuler-21.09/source/ +enabled=0 +gpgcheck=1 +gpgkey=http://repo.openeuler.org/openEuler-21.09/source/RPM-GPG-KEY-openEuler + +[update] +name=update +baseurl=http://repo.openeuler.org/openEuler-21.09/update/$basearch/ +enabled=1 +gpgcheck=1 +gpgkey=http://repo.openeuler.org/openEuler-21.09/OS/$basearch/RPM-GPG-KEY-openEuler -- Gitee