diff --git a/0002-compel-add-rseq-syscall-into-compel-std-plugin-sysca.patch b/0002-compel-add-rseq-syscall-into-compel-std-plugin-sysca.patch deleted file mode 100644 index ac103f47572010732534a6125d8f0e6c31d6df8e..0000000000000000000000000000000000000000 --- a/0002-compel-add-rseq-syscall-into-compel-std-plugin-sysca.patch +++ /dev/null @@ -1,74 +0,0 @@ -From ee46b1b5755eacf3be02a67934f0dc690293745b Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 13:28:51 +0800 -Subject: [PATCH 02/16] compel: add rseq syscall into compel std plugin syscall - tables Add rseq syscall numbers for: arm/aarch64, mips64, ppc64le, s390, - x86_64/x86 - -Signed-off-by: Alexander Mikhalitsyn ---- - compel/arch/arm/plugins/std/syscalls/syscall.def | 1 + - compel/arch/mips/plugins/std/syscalls/syscall_64.tbl | 1 + - .../compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl | 1 + - .../compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl | 1 + - compel/arch/x86/plugins/std/syscalls/syscall_32.tbl | 1 + - compel/arch/x86/plugins/std/syscalls/syscall_64.tbl | 1 + - 6 files changed, 6 insertions(+) - -diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def -index 1b877d1..bb78cbb 100644 ---- a/compel/arch/arm/plugins/std/syscalls/syscall.def -+++ b/compel/arch/arm/plugins/std/syscalls/syscall.def -@@ -119,3 +119,4 @@ clone3 435 435 (struct clone_args *uargs, size_t size) - sched_setaffinity 122 241 (int fd, size_t cpusetsize, const cpu_set_t *mask) - pidfd_open 434 434 (pid_t pid, unsigned int flags) - pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags) -+rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl -index 7a6db19..95dc7d3 100644 ---- a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl -+++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl -@@ -115,3 +115,4 @@ __NR_fsmount 5432 sys_fsmount (int fd, unsigned int flags, unsigned int attr - __NR_clone3 5435 sys_clone3 (struct clone_args *uargs, size_t size) - __NR_pidfd_open 5434 sys_pidfd_open (pid_t pid, unsigned int flags) - __NR_pidfd_getfd 5438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) -+__NR_rseq 5327 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl -index dd79187..ad0d94f 100644 ---- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl -+++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl -@@ -115,3 +115,4 @@ __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) - __NR_sched_setaffinity 222 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask) - __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) - __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) -+__NR_rseq 387 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl -index 282adaf..916b697 100644 ---- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl -+++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl -@@ -115,3 +115,4 @@ __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) - __NR_sched_setaffinity 239 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask) - __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) - __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) -+__NR_rseq 383 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl -index 3fe3194..90f23d5 100644 ---- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl -+++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl -@@ -103,3 +103,4 @@ __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_f - __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) - __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) - __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) -+__NR_rseq 386 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl -index c1d119d..323fab1 100644 ---- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl -+++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl -@@ -114,3 +114,4 @@ __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_ - __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) - __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) - __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) -+__NR_rseq 334 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) --- -2.30.0 - diff --git a/0003-kerndat-check-for-rseq-syscall-support.patch b/0003-kerndat-check-for-rseq-syscall-support.patch deleted file mode 100644 index 1729b14d09452869045758ae768c8503ef075e9f..0000000000000000000000000000000000000000 --- a/0003-kerndat-check-for-rseq-syscall-support.patch +++ /dev/null @@ -1,62 +0,0 @@ -From ebd917f395b8bb3c4d6bbe51f9210d1aeca2e1fd Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 13:34:10 +0800 -Subject: [PATCH 03/16] kerndat: check for rseq syscall support Signed-off-by: - Alexander Mikhalitsyn - ---- - criu/include/kerndat.h | 1 + - criu/kerndat.c | 18 ++++++++++++++++++ - 2 files changed, 19 insertions(+) - -diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h -index 80bad7f..44a6976 100644 ---- a/criu/include/kerndat.h -+++ b/criu/include/kerndat.h -@@ -74,6 +74,7 @@ struct kerndat_s { - bool has_pidfd_getfd; - bool has_nspid; - bool has_nftables_concat; -+ bool has_rseq; - }; - - extern struct kerndat_s kdat; -diff --git a/criu/kerndat.c b/criu/kerndat.c -index 0e88ba4..f5a4490 100644 ---- a/criu/kerndat.c -+++ b/criu/kerndat.c -@@ -816,6 +816,20 @@ static int kerndat_x86_has_ptrace_fpu_xsave_bug(void) - return 0; - } - -+static int kerndat_has_rseq(void) -+{ -+ if (syscall(__NR_rseq, NULL, 0, 0, 0) != -1) { -+ pr_err("rseq should fail\n"); -+ return -1; -+ } -+ if (errno == ENOSYS) -+ pr_info("rseq syscall isn't supported\n"); -+ else -+ kdat.has_rseq = true; -+ -+ return 0; -+} -+ - #define KERNDAT_CACHE_FILE KDAT_RUNDIR "/criu.kdat" - #define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR "/.criu.kdat" - -@@ -1360,6 +1374,10 @@ int kerndat_init(void) - ret = -1; - } - -+ if (!ret && kerndat_has_rseq()) { -+ pr_err("kerndat_has_rseq failed when initializing kerndat.\n"); -+ ret = -1; -+ } - kerndat_lsm(); - kerndat_mmap_min_addr(); - kerndat_files_stat(); --- -2.30.0 - diff --git a/0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch b/0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch deleted file mode 100644 index 51457c6e87f7694594b96fc58597bfcf58eb0d14..0000000000000000000000000000000000000000 --- a/0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch +++ /dev/null @@ -1,161 +0,0 @@ -From fe1f84eb98092b1aff60ae2be11e351b165f3f43 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 13:35:53 +0800 -Subject: [PATCH 04/16] util: move fork_and_ptrace_attach helper from cr-check - Signed-off-by: Alexander Mikhalitsyn - ---- - criu/cr-check.c | 55 ------------------------------- - criu/include/util.h | 1 + - criu/util.c | 57 +++++++++++++++++++++++++++++++++ - 3 files changed, 58 insertions(+), 55 deletions(-) - -diff --git a/criu/cr-check.c b/criu/cr-check.c -index 3575fb3..d41ef8f 100644 ---- a/criu/cr-check.c -+++ b/criu/cr-check.c -@@ -537,61 +537,6 @@ static int check_sigqueuinfo(void) - return 0; - } - --static pid_t fork_and_ptrace_attach(int (*child_setup)(void)) --{ -- pid_t pid; -- int sk_pair[2], sk; -- char c = 0; -- -- if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) { -- pr_perror("socketpair"); -- return -1; -- } -- -- pid = fork(); -- if (pid < 0) { -- pr_perror("fork"); -- return -1; -- } else if (pid == 0) { -- sk = sk_pair[1]; -- close(sk_pair[0]); -- -- if (child_setup && child_setup() != 0) -- exit(1); -- -- if (write(sk, &c, 1) != 1) { -- pr_perror("write"); -- exit(1); -- } -- -- while (1) -- sleep(1000); -- exit(1); -- } -- -- sk = sk_pair[0]; -- close(sk_pair[1]); -- -- if (read(sk, &c, 1) != 1) { -- close(sk); -- kill(pid, SIGKILL); -- pr_perror("read"); -- return -1; -- } -- -- close(sk); -- -- if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) { -- pr_perror("Unable to ptrace the child"); -- kill(pid, SIGKILL); -- return -1; -- } -- -- waitpid(pid, NULL, 0); -- -- return pid; --} -- - static int check_ptrace_peeksiginfo(void) - { - struct ptrace_peeksiginfo_args arg; -diff --git a/criu/include/util.h b/criu/include/util.h -index a2dac22..1c0b3c7 100644 ---- a/criu/include/util.h -+++ b/criu/include/util.h -@@ -166,6 +166,7 @@ extern int is_anon_link_type(char *link, char *type); - - extern int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned flags); - extern int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid); -+extern pid_t fork_and_ptrace_attach(int (*child_setup)(void)); - extern int cr_daemon(int nochdir, int noclose, int close_fd); - extern int status_ready(void); - extern int is_root_user(void); -diff --git a/criu/util.c b/criu/util.c -index 06124c2..e682161 100644 ---- a/criu/util.c -+++ b/criu/util.c -@@ -654,6 +654,63 @@ out: - return ret; - } - -+pid_t fork_and_ptrace_attach(int (*child_setup)(void)) -+{ -+ pid_t pid; -+ int sk_pair[2], sk; -+ char c = 0; -+ -+ if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) { -+ pr_perror("socketpair"); -+ return -1; -+ } -+ -+ pid = fork(); -+ if (pid < 0) { -+ pr_perror("fork"); -+ return -1; -+ } else if (pid == 0) { -+ sk = sk_pair[1]; -+ close(sk_pair[0]); -+ -+ if (child_setup && child_setup() != 0) -+ exit(1); -+ -+ if (write(sk, &c, 1) != 1) { -+ pr_perror("write"); -+ exit(1); -+ } -+ -+ while (1) -+ sleep(1000); -+ exit(1); -+ } -+ -+ sk = sk_pair[0]; -+ close(sk_pair[1]); -+ -+ if (read(sk, &c, 1) != 1) { -+ close(sk); -+ kill(pid, SIGKILL); -+ waitpid(pid, NULL, 0); -+ pr_perror("read"); -+ return -1; -+ } -+ -+ close(sk); -+ -+ if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) { -+ pr_perror("Unable to ptrace the child"); -+ kill(pid, SIGKILL); -+ waitpid(pid, NULL, 0); -+ return -1; -+ } -+ -+ waitpid(pid, NULL, 0); -+ -+ return pid; -+} -+ - int status_ready(void) - { - char c = 0; --- -2.30.0 - diff --git a/0005-cr-check-Add-ptrace-rseq-conf-dump-feature.patch b/0005-cr-check-Add-ptrace-rseq-conf-dump-feature.patch deleted file mode 100644 index 5a82e084015fc162a56423e55359a83ac23991af..0000000000000000000000000000000000000000 --- a/0005-cr-check-Add-ptrace-rseq-conf-dump-feature.patch +++ /dev/null @@ -1,162 +0,0 @@ -From 3c567693f2e6579109dbabcca0e90c059ce5af25 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:30:18 +0800 -Subject: [PATCH 05/16] cr-check: Add ptrace rseq conf dump feature Add - "get_rseq_conf" feature corresponding to the - ptrace(PTRACE_GET_RSEQ_CONFIGURATION) support. - -Signed-off-by: Alexander Mikhalitsyn ---- - compel/include/uapi/ptrace.h | 12 +++++++ - criu/cr-check.c | 11 +++++++ - criu/include/kerndat.h | 1 + - criu/kerndat.c | 41 ++++++++++++++++++++++++ - 4 files changed, 65 insertions(+) - -diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h -index c5291d2..bfe28c7 100644 ---- a/compel/include/uapi/ptrace.h -+++ b/compel/include/uapi/ptrace.h -@@ -65,6 +65,18 @@ typedef struct { - uint64_t flags; /* Output: filter's flags */ - } seccomp_metadata_t; - -+#ifndef PTRACE_GET_RSEQ_CONFIGURATION -+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f -+ -+struct ptrace_rseq_configuration { -+ __u64 rseq_abi_pointer; -+ __u32 rseq_abi_size; -+ __u32 signature; -+ __u32 flags; -+ __u32 pad; -+}; -+#endif -+ - #ifdef PTRACE_EVENT_STOP - #if PTRACE_EVENT_STOP == 7 /* Bad value from Linux 3.1-3.3, fixed in 3.4 */ - #undef PTRACE_EVENT_STOP -diff --git a/criu/cr-check.c b/criu/cr-check.c -index d41ef8f..ba87511 100644 ---- a/criu/cr-check.c -+++ b/criu/cr-check.c -@@ -794,6 +794,15 @@ static int check_ptrace_dump_seccomp_filters(void) - return ret; - } - -+static int check_ptrace_get_rseq_conf(void) -+{ -+ if (!kdat.has_ptrace_get_rseq_conf) { -+ pr_warn("ptrace(PTRACE_GET_RSEQ_CONFIGURATION) isn't supported. C/R of processes which are using rseq() won't work.\n"); -+ return -1; -+ } -+ return 0; -+} -+ - static int check_mem_dirty_track(void) - { - if (!kdat.has_dirty_track) { -@@ -1435,6 +1444,7 @@ int cr_check(void) - ret |= check_ns_pid(); - ret |= check_apparmor_stacking(); - ret |= check_network_lock_nftables(); -+ ret |= check_ptrace_get_rseq_conf(); - } - - /* -@@ -1547,6 +1557,7 @@ static struct feature_list feature_list[] = { - { "ns_pid", check_ns_pid }, - { "apparmor_stacking", check_apparmor_stacking }, - { "network_lock_nftables", check_network_lock_nftables }, -+ { "get_rseq_conf", check_ptrace_get_rseq_conf }, - { NULL, NULL }, - }; - -diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h -index 44a6976..05abeda 100644 ---- a/criu/include/kerndat.h -+++ b/criu/include/kerndat.h -@@ -75,6 +75,7 @@ struct kerndat_s { - bool has_nspid; - bool has_nftables_concat; - bool has_rseq; -+ bool has_ptrace_get_rseq_conf; - }; - - extern struct kerndat_s kdat; -diff --git a/criu/kerndat.c b/criu/kerndat.c -index f5a4490..4841387 100644 ---- a/criu/kerndat.c -+++ b/criu/kerndat.c -@@ -4,6 +4,8 @@ - #include - #include - #include -+#include -+#include - #include - #include - #include -@@ -36,6 +38,7 @@ - #include "sockets.h" - #include "net.h" - #include "tun.h" -+#include - #include - #include "netfilter.h" - #include "fsnotify.h" -@@ -830,6 +833,40 @@ static int kerndat_has_rseq(void) - return 0; - } - -+static int kerndat_has_ptrace_get_rseq_conf(void) -+{ -+ pid_t pid; -+ int len; -+ struct ptrace_rseq_configuration rseq; -+ -+ pid = fork_and_ptrace_attach(NULL); -+ if (pid < 0) -+ return -1; -+ -+ len = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, pid, sizeof(rseq), &rseq); -+ if (len != sizeof(rseq)) { -+ kdat.has_ptrace_get_rseq_conf = false; -+ pr_info("ptrace(PTRACE_GET_RSEQ_CONFIGURATION) is not supported\n"); -+ goto out; -+ } -+ -+ /* -+ * flags is always zero from the kernel side, if it will be changed -+ * we need to pay attention to that and, possibly, make changes on the CRIU side. -+ */ -+ if (rseq.flags != 0) { -+ kdat.has_ptrace_get_rseq_conf = false; -+ pr_err("ptrace(PTRACE_GET_RSEQ_CONFIGURATION): rseq.flags != 0\n"); -+ } else { -+ kdat.has_ptrace_get_rseq_conf = true; -+ } -+ -+out: -+ kill(pid, SIGKILL); -+ waitpid(pid, NULL, 0); -+ return 0; -+} -+ - #define KERNDAT_CACHE_FILE KDAT_RUNDIR "/criu.kdat" - #define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR "/.criu.kdat" - -@@ -1378,6 +1415,10 @@ int kerndat_init(void) - pr_err("kerndat_has_rseq failed when initializing kerndat.\n"); - ret = -1; - } -+ if (!ret && kerndat_has_ptrace_get_rseq_conf()) { -+ pr_err("kerndat_has_ptrace_get_rseq_conf failed when initializing kerndat.\n"); -+ ret = -1; -+ } - kerndat_lsm(); - kerndat_mmap_min_addr(); - kerndat_files_stat(); --- -2.30.0 - diff --git a/0006-rseq-initial-support.patch b/0006-rseq-initial-support.patch deleted file mode 100644 index 4c6898552bdd29bfe4bdc0259fe8eefcb9f531ad..0000000000000000000000000000000000000000 --- a/0006-rseq-initial-support.patch +++ /dev/null @@ -1,702 +0,0 @@ -From e444c089ebfb03fb2b6d69a40322d31ab33c0597 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 14:52:35 +0800 -Subject: [PATCH 06/16] rseq: initial support TODO: 1. properly handle case - when the kernel has rseq() support but has no - ptrace(PTRACE_GET_RSEQ_CONFIGURATION) support and user processes haven't used - rseq(). 2. properly handle "transient" states, when CRIU comes during rseq - was executed. We need test for this case with some "heavy" rseq + we need to - properly handle RSEQ_CS_* flags. - -Fixes: #1696 - -Reported-by: Radostin Stoyanov -Suggested-by: Florian Weimer -Signed-off-by: Alexander Mikhalitsyn ---- - compel/include/uapi/ptrace.h | 16 +-- - criu/cr-dump.c | 99 ++++++++++++++++ - criu/cr-restore.c | 17 +++ - criu/include/linux/rseq.h | 144 +++++++++++++++++++++++ - criu/include/parasite.h | 7 ++ - criu/include/restorer.h | 7 ++ - criu/kerndat.c | 2 +- - criu/parasite-syscall.c | 11 ++ - criu/pie/parasite.c | 99 ++++++++++++++++ - criu/pie/restorer.c | 24 ++++ - images/Makefile | 1 + - images/core.proto | 2 + - images/rseq.proto | 9 ++ - 13 files changed, 429 insertions(+), 9 deletions(-) - create mode 100644 criu/include/linux/rseq.h - create mode 100644 images/rseq.proto - -diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h -index bfe28c7..d807a92 100644 ---- a/compel/include/uapi/ptrace.h -+++ b/compel/include/uapi/ptrace.h -@@ -66,14 +66,14 @@ typedef struct { - } seccomp_metadata_t; - - #ifndef PTRACE_GET_RSEQ_CONFIGURATION --#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f -- --struct ptrace_rseq_configuration { -- __u64 rseq_abi_pointer; -- __u32 rseq_abi_size; -- __u32 signature; -- __u32 flags; -- __u32 pad; -+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f -+ -+struct __ptrace_rseq_configuration { -+ uint64_t rseq_abi_pointer; -+ uint32_t rseq_abi_size; -+ uint32_t signature; -+ uint32_t flags; -+ uint32_t pad; - }; - #endif - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index f07fe6e..91dd08a 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -45,6 +45,7 @@ - #include "proc_parse.h" - #include "parasite.h" - #include "parasite-syscall.h" -+#include - #include "files.h" - #include "files-reg.h" - #include "shmem.h" -@@ -200,6 +201,25 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc) - return 0; - } - -+static int check_thread_rseq(pid_t tid, const struct parasite_check_rseq *ti_rseq, bool has_tc_rseq_entry) -+{ -+ if (!kdat.has_rseq || kdat.has_ptrace_get_rseq_conf) -+ return 0; -+ -+ pr_debug("%d has rseq_inited = %d\n", tid, ti_rseq->rseq_inited); -+ -+ /* -+ * We have no kdat.has_ptrace_get_rseq_conf and user -+ * process has rseq() used, let's fail dump. -+ */ -+ if (ti_rseq->rseq_inited) { -+ pr_err("%d has rseq but kernel lacks get_rseq_conf feature\n", tid); -+ return -1; -+ } -+ -+ return 0; -+} -+ - struct cr_imgset *glob_imgset; - - static int collect_fds(pid_t pid, struct parasite_drain_fd **dfds) -@@ -730,6 +750,17 @@ int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread - if (!ret) - ret = seccomp_dump_thread(pid, tc); - -+ /* -+ * We are dumping rseq() in the dump_thread_rseq() function, -+ * *before* processes gets infected (because of ptrace requests -+ * API restriction). At this point, if the kernel lacks -+ * kdat.has_ptrace_get_rseq_conf support we have to ensure -+ * that dumpable processes haven't initialized rseq() or -+ * fail dump if rseq() was used. -+ */ -+ if (!ret) -+ ret = check_thread_rseq(pid, &ti->rseq, !!tc->rseq_entry); -+ - return ret; - } - -@@ -1016,6 +1047,68 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item) - return 0; - } - -+static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep) -+{ -+ struct __ptrace_rseq_configuration rseq; -+ RseqEntry *rseqe = NULL; -+ int ret; -+ -+ /* -+ * If we are here it means that rseq() syscall is supported, -+ * but ptrace(PTRACE_GET_RSEQ_CONFIGURATION) isn't supported, -+ * we can just fail dump here. But this is bad idea, IMHO. -+ * -+ * So, we will try to detect if victim process was used rseq(). -+ * See check_rseq() and check_thread_rseq() functions. -+ */ -+ if (!kdat.has_ptrace_get_rseq_conf) -+ return 0; -+ -+ ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseq), &rseq); -+ if (ret != sizeof(rseq)) { -+ pr_perror("ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) = %d", tid, ret); -+ return -1; -+ } -+ -+ if (rseq.flags != 0) { -+ pr_err("something wrong with ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) flags = 0x%x\n", tid, -+ rseq.flags); -+ return -1; -+ } -+ -+ pr_err("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, rseq.signature); -+ -+ rseqe = xmalloc(sizeof(*rseqe)); -+ if (!rseqe) -+ return -1; -+ -+ rseq_entry__init(rseqe); -+ -+ rseqe->rseq_abi_pointer = rseq.rseq_abi_pointer; -+ rseqe->rseq_abi_size = rseq.rseq_abi_size; -+ rseqe->signature = rseq.signature; -+ -+ *rseqep = rseqe; -+ -+ return 0; -+} -+ -+static int dump_task_rseq(pid_t pid, struct pstree_item *item) -+{ -+ int i; -+ -+ /* if rseq() syscall isn't supported then nothing to dump */ -+ if (!kdat.has_rseq) -+ return 0; -+ -+ for (i = 0; i < item->nr_threads; i++) { -+ if (dump_thread_rseq(item->threads[i].real, &item->core[i]->thread_core->rseq_entry)) -+ return -1; -+ } -+ -+ return 0; -+} -+ - static struct proc_pid_stat pps_buf; - - static int dump_task_threads(struct parasite_ctl *parasite_ctl, const struct pstree_item *item) -@@ -1304,6 +1397,12 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - goto err; - } - -+ ret = dump_task_rseq(pid, item); -+ if (ret) { -+ pr_err("Dump %d rseq failed %d\n", pid, ret); -+ goto err; -+ } -+ - parasite_ctl = parasite_infect_seized(pid, item, &vmas); - if (!parasite_ctl) { - pr_err("Can't infect (pid: %d) with parasite\n", pid); -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 5b645c1..b2bd044 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -2975,6 +2975,19 @@ static int prep_sched_info(struct rst_sched_param *sp, ThreadCoreEntry *tc) - return 0; - } - -+static int prep_rseq(struct rst_rseq_param *rseq, ThreadCoreEntry *tc) -+{ -+ /* compatibility with older CRIU versions */ -+ if (!tc->rseq_entry) -+ return 0; -+ -+ rseq->rseq_abi_pointer = tc->rseq_entry->rseq_abi_pointer; -+ rseq->rseq_abi_size = tc->rseq_entry->rseq_abi_size; -+ rseq->signature = tc->rseq_entry->signature; -+ -+ return 0; -+} -+ - static rlim_t decode_rlim(rlim_t ival) - { - return ival == -1 ? RLIM_INFINITY : ival; -@@ -3704,6 +3717,10 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns - thread_args[i].clear_tid_addr = CORE_THREAD_ARCH_INFO(tcore)->clear_tid_addr; - core_get_tls(tcore, &thread_args[i].tls); - -+ ret = prep_rseq(&thread_args[i].rseq, tcore->thread_core); -+ if (ret) -+ goto err; -+ - rst_reloc_creds(&thread_args[i], &creds_pos_next); - - thread_args[i].futex_rla = tcore->thread_core->futex_rla; -diff --git a/criu/include/linux/rseq.h b/criu/include/linux/rseq.h -new file mode 100644 -index 0000000..5c1706a ---- /dev/null -+++ b/criu/include/linux/rseq.h -@@ -0,0 +1,144 @@ -+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -+#ifndef _UAPI_LINUX_RSEQ_H -+#define _UAPI_LINUX_RSEQ_H -+ -+/* -+ * linux/rseq.h -+ * -+ * Restartable sequences system call API -+ * -+ * Copyright (c) 2015-2018 Mathieu Desnoyers -+ */ -+ -+#include -+#include -+ -+enum rseq_cpu_id_state { -+ RSEQ_CPU_ID_UNINITIALIZED = -1, -+ RSEQ_CPU_ID_REGISTRATION_FAILED = -2, -+}; -+ -+enum rseq_flags { -+ RSEQ_FLAG_UNREGISTER = (1 << 0), -+}; -+ -+enum rseq_cs_flags_bit { -+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, -+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, -+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, -+}; -+ -+enum rseq_cs_flags { -+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), -+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), -+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), -+}; -+ -+/* -+ * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always -+ * contained within a single cache-line. It is usually declared as -+ * link-time constant data. -+ */ -+struct rseq_cs { -+ /* Version of this structure. */ -+ __u32 version; -+ /* enum rseq_cs_flags */ -+ __u32 flags; -+ __u64 start_ip; -+ /* Offset from start_ip. */ -+ __u64 post_commit_offset; -+ __u64 abort_ip; -+} __attribute__((aligned(4 * sizeof(__u64)))); -+ -+/* -+ * struct rseq is aligned on 4 * 8 bytes to ensure it is always -+ * contained within a single cache-line. -+ * -+ * A single struct rseq per thread is allowed. -+ */ -+struct rseq { -+ /* -+ * Restartable sequences cpu_id_start field. Updated by the -+ * kernel. Read by user-space with single-copy atomicity -+ * semantics. This field should only be read by the thread which -+ * registered this data structure. Aligned on 32-bit. Always -+ * contains a value in the range of possible CPUs, although the -+ * value may not be the actual current CPU (e.g. if rseq is not -+ * initialized). This CPU number value should always be compared -+ * against the value of the cpu_id field before performing a rseq -+ * commit or returning a value read from a data structure indexed -+ * using the cpu_id_start value. -+ */ -+ __u32 cpu_id_start; -+ /* -+ * Restartable sequences cpu_id field. Updated by the kernel. -+ * Read by user-space with single-copy atomicity semantics. This -+ * field should only be read by the thread which registered this -+ * data structure. Aligned on 32-bit. Values -+ * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED -+ * have a special semantic: the former means "rseq uninitialized", -+ * and latter means "rseq initialization failed". This value is -+ * meant to be read within rseq critical sections and compared -+ * with the cpu_id_start value previously read, before performing -+ * the commit instruction, or read and compared with the -+ * cpu_id_start value before returning a value loaded from a data -+ * structure indexed using the cpu_id_start value. -+ */ -+ __u32 cpu_id; -+ /* -+ * Restartable sequences rseq_cs field. -+ * -+ * Contains NULL when no critical section is active for the current -+ * thread, or holds a pointer to the currently active struct rseq_cs. -+ * -+ * Updated by user-space, which sets the address of the currently -+ * active rseq_cs at the beginning of assembly instruction sequence -+ * block, and set to NULL by the kernel when it restarts an assembly -+ * instruction sequence block, as well as when the kernel detects that -+ * it is preempting or delivering a signal outside of the range -+ * targeted by the rseq_cs. Also needs to be set to NULL by user-space -+ * before reclaiming memory that contains the targeted struct rseq_cs. -+ * -+ * Read and set by the kernel. Set by user-space with single-copy -+ * atomicity semantics. This field should only be updated by the -+ * thread which registered this data structure. Aligned on 64-bit. -+ */ -+ union { -+ __u64 ptr64; -+#ifdef __LP64__ -+ __u64 ptr; -+#else -+ struct { -+#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) -+ __u32 padding; /* Initialized to zero. */ -+ __u32 ptr32; -+#else /* LITTLE */ -+ __u32 ptr32; -+ __u32 padding; /* Initialized to zero. */ -+#endif /* ENDIAN */ -+ } ptr; -+#endif -+ } rseq_cs; -+ -+ /* -+ * Restartable sequences flags field. -+ * -+ * This field should only be updated by the thread which -+ * registered this data structure. Read by the kernel. -+ * Mainly used for single-stepping through rseq critical sections -+ * with debuggers. -+ * -+ * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT -+ * Inhibit instruction sequence block restart on preemption -+ * for this thread. -+ * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL -+ * Inhibit instruction sequence block restart on signal -+ * delivery for this thread. -+ * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE -+ * Inhibit instruction sequence block restart on migration for -+ * this thread. -+ */ -+ __u32 flags; -+} __attribute__((aligned(4 * sizeof(__u64)))); -+ -+#endif /* _UAPI_LINUX_RSEQ_H */ -diff --git a/criu/include/parasite.h b/criu/include/parasite.h -index 8107aa4..5fde809 100644 ---- a/criu/include/parasite.h -+++ b/criu/include/parasite.h -@@ -164,10 +164,17 @@ struct parasite_dump_creds { - unsigned int groups[0]; - }; - -+struct parasite_check_rseq { -+ bool has_rseq; -+ bool has_ptrace_get_rseq_conf; /* no need to check if supported */ -+ bool rseq_inited; -+}; -+ - struct parasite_dump_thread { - unsigned int *tid_addr; - pid_t tid; - tls_t tls; -+ struct parasite_check_rseq rseq; - stack_t sas; - int pdeath_sig; - char comm[TASK_COMM_LEN]; -diff --git a/criu/include/restorer.h b/criu/include/restorer.h -index c2ef8f0..c29d869 100644 ---- a/criu/include/restorer.h -+++ b/criu/include/restorer.h -@@ -45,6 +45,12 @@ struct rst_sched_param { - int prio; - }; - -+struct rst_rseq_param { -+ u64 rseq_abi_pointer; -+ u32 rseq_abi_size; -+ u32 signature; -+}; -+ - struct restore_posix_timer { - struct str_posix_timer spt; - struct itimerspec val; -@@ -99,6 +105,7 @@ struct thread_restore_args { - struct task_restore_args *ta; - - tls_t tls; -+ struct rst_rseq_param rseq; - - siginfo_t *siginfo; - unsigned int siginfo_n; -diff --git a/criu/kerndat.c b/criu/kerndat.c -index 4841387..af7113a 100644 ---- a/criu/kerndat.c -+++ b/criu/kerndat.c -@@ -837,7 +837,7 @@ static int kerndat_has_ptrace_get_rseq_conf(void) - { - pid_t pid; - int len; -- struct ptrace_rseq_configuration rseq; -+ struct __ptrace_rseq_configuration rseq; - - pid = fork_and_ptrace_attach(NULL); - if (pid < 0) -diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c -index 7175ade..ee4fa86 100644 ---- a/criu/parasite-syscall.c -+++ b/criu/parasite-syscall.c -@@ -132,6 +132,13 @@ static int alloc_groups_copy_creds(CredsEntry *ce, struct parasite_dump_creds *c - return ce->groups ? 0 : -ENOMEM; - } - -+static void init_parasite_rseq_arg(struct parasite_check_rseq *rseq) -+{ -+ rseq->has_rseq = kdat.has_rseq; -+ rseq->has_ptrace_get_rseq_conf = kdat.has_ptrace_get_rseq_conf; -+ rseq->rseq_inited = false; -+} -+ - int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEntry *core) - { - ThreadCoreEntry *tc = core->thread_core; -@@ -144,6 +151,8 @@ int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEn - pc = args->creds; - pc->cap_last_cap = kdat.last_cap; - -+ init_parasite_rseq_arg(&args->rseq); -+ - ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_THREAD, ctl); - if (ret < 0) - return ret; -@@ -197,6 +206,8 @@ int parasite_dump_thread_seized(struct parasite_thread_ctl *tctl, struct parasit - - compel_arch_get_tls_thread(tctl, &args->tls); - -+ init_parasite_rseq_arg(&args->rseq); -+ - ret = compel_run_in_thread(tctl, PARASITE_CMD_DUMP_THREAD); - if (ret) { - pr_err("Can't init thread in parasite %d\n", pid); -diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c -index bc0a33c..e49958b 100644 ---- a/criu/pie/parasite.c -+++ b/criu/pie/parasite.c -@@ -8,6 +8,8 @@ - #include - #include - -+#include "linux/rseq.h" -+ - #include "common/config.h" - #include "int.h" - #include "types.h" -@@ -167,6 +169,7 @@ static int dump_posix_timers(struct parasite_dump_posix_timers_args *args) - } - - static int dump_creds(struct parasite_dump_creds *args); -+static int check_rseq(struct parasite_check_rseq *rseq); - - static int dump_thread_common(struct parasite_dump_thread *ti) - { -@@ -197,6 +200,12 @@ static int dump_thread_common(struct parasite_dump_thread *ti) - goto out; - } - -+ ret = check_rseq(&ti->rseq); -+ if (ret) { -+ pr_err("Unable to check if rseq() is initialized: %d\n", ret); -+ goto out; -+ } -+ - ret = dump_creds(ti->creds); - out: - return ret; -@@ -313,6 +322,96 @@ grps_err: - return -1; - } - -+static int check_rseq(struct parasite_check_rseq *rseq) -+{ -+ int ret; -+ unsigned long rseq_abi_pointer; -+ unsigned long rseq_abi_size; -+ uint32_t rseq_signature; -+ void *addr; -+ -+ /* no need to do hacky check if we can get all info from ptrace() */ -+ if (!rseq->has_rseq || rseq->has_ptrace_get_rseq_conf) -+ return 0; -+ -+ /* -+ * We need to determine if victim process has rseq() -+ * initialized, but we have no *any* proper kernel interface -+ * supported at this point. -+ * Our plan: -+ * 1. We know that if we call rseq() syscall and process already -+ * has current->rseq filled, then we get: -+ * -EINVAL if current->rseq != rseq || rseq_len != sizeof(*rseq), -+ * -EPERM if current->rseq_sig != sig), -+ * -EBUSY if current->rseq == rseq && rseq_len == sizeof(*rseq) && -+ * current->rseq_sig != sig -+ * if current->rseq == NULL (rseq() wasn't used) then we go to: -+ * IS_ALIGNED(rseq ...) check, if we fail it we get -EINVAL and it -+ * will be hard to distinguish case when rseq() was initialized or not. -+ * Let's construct arguments payload -+ * with: -+ * 1. correct rseq_abi_size -+ * 2. aligned and correct rseq_abi_pointer -+ * And see what rseq() return to us. -+ * If ret value is: -+ * 0: it means that rseq *wasn't* used and we successfuly registered it, -+ * -EINVAL or : it means that rseq is already initialized, -+ * so we *have* to dump it. But as we have has_ptrace_get_rseq_conf = false, -+ * we should just fail dump as it's unsafe to skip rseq() dump for processes -+ * with rseq() initialized. -+ * -EPERM or -EBUSY: should not happen as we take a fresh memory area for rseq -+ */ -+ addr = (void *)sys_mmap(NULL, sizeof(struct rseq), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if (addr == MAP_FAILED) { -+ pr_err("mmap() failed for struct rseq ret = %lx\n", (unsigned long)addr); -+ return -1; -+ } -+ -+ memset(addr, 0, sizeof(struct rseq)); -+ -+ /* sys_mmap returns page aligned addresses */ -+ rseq_abi_pointer = (unsigned long)addr; -+ rseq_abi_size = (unsigned long)sizeof(struct rseq); -+ /* it's not so important to have unique signature for us, -+ * because rseq_abi_pointer is guaranteed to be unique -+ */ -+ rseq_signature = 0x12345612; -+ -+ pr_info("\ttrying sys_rseq(%lx, %lx, %x, %x)\n", rseq_abi_pointer, rseq_abi_size, 0, rseq_signature); -+ ret = sys_rseq((void *)rseq_abi_pointer, rseq_abi_size, 0, rseq_signature); -+ if (ret) { -+ if (ret == -EINVAL) { -+ pr_info("\trseq is initialized in the victim\n"); -+ rseq->rseq_inited = true; -+ -+ ret = 0; -+ } else { -+ pr_err("\tunexpected failure of sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer, -+ rseq_abi_size, 0, rseq_signature, ret); -+ -+ ret = -1; -+ } -+ } else { -+ ret = sys_rseq((void *)rseq_abi_pointer, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, rseq_signature); -+ if (ret) { -+ pr_err("\tfailed to unregister sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer, -+ rseq_abi_size, RSEQ_FLAG_UNREGISTER, rseq_signature, ret); -+ -+ ret = -1; -+ goto out; -+ } -+ -+ pr_info("\tsys_rseq succeed, let's unregister it back... ok Error\n"); -+ pr_info("\trseq is non-initialized in the victim Error\n"); -+ rseq->rseq_inited = false; -+ ret = 0; -+ } -+ -+out: -+ sys_munmap(addr, sizeof(struct rseq)); -+ return ret; -+} -+ - static int fill_fds_fown(int fd, struct fd_opts *p) - { - int flags, ret; -diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c -index fbc89fe..368b5a0 100644 ---- a/criu/pie/restorer.c -+++ b/criu/pie/restorer.c -@@ -459,6 +459,27 @@ static int restore_cpu_affinity(struct task_restore_args *args) - return 0; - } - -+static int restore_rseq(struct rst_rseq_param *rseq) -+{ -+ int ret; -+ -+ if (!rseq->rseq_abi_pointer) { -+ pr_debug("rseq: nothing to restore\n"); -+ return 0; -+ } -+ -+ pr_debug("rseq: rseq_abi_pointer = %lx signature = %x\n", (unsigned long)rseq->rseq_abi_pointer, rseq->signature); -+ -+ ret = sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 0, rseq->signature); -+ if (ret) { -+ pr_err("failed sys_rseq(%lx, %lx, %x, %x) = %d\n", (unsigned long)rseq->rseq_abi_pointer, -+ (unsigned long)rseq->rseq_abi_size, 0, rseq->signature, ret); -+ return -1; -+ } -+ -+ return 0; -+} -+ - static int restore_seccomp_filter(pid_t tid, struct thread_restore_args *args) - { - unsigned int flags = args->seccomp_force_tsync ? SECCOMP_FILTER_FLAG_TSYNC : 0; -@@ -583,6 +604,9 @@ static int restore_thread_common(struct thread_restore_args *args) - - restore_tls(&args->tls); - -+ if (restore_rseq(&args->rseq)) -+ return -1; -+ - return 0; - } - -diff --git a/images/Makefile b/images/Makefile -index 2eaeb7c..004e22e 100644 ---- a/images/Makefile -+++ b/images/Makefile -@@ -71,6 +71,7 @@ proto-obj-y += img-streamer.o - proto-obj-y += bpfmap-file.o - proto-obj-y += bpfmap-data.o - proto-obj-y += apparmor.o -+proto-obj-y += rseq.o - - CFLAGS += -iquote $(obj)/ - -diff --git a/images/core.proto b/images/core.proto -index 39e7f32..b66230e 100644 ---- a/images/core.proto -+++ b/images/core.proto -@@ -14,6 +14,7 @@ import "timer.proto"; - import "creds.proto"; - import "sa.proto"; - import "siginfo.proto"; -+import "rseq.proto"; - - import "opts.proto"; - -@@ -106,6 +107,7 @@ message thread_core_entry { - optional string comm = 13; - optional uint64 blk_sigset_extended = 14; - required thread_allowedcpus_entry allowed_cpus = 15; -+ optional rseq_entry rseq_entry = 16; - } - - message task_rlimits_entry { -diff --git a/images/rseq.proto b/images/rseq.proto -new file mode 100644 -index 0000000..be28004 ---- /dev/null -+++ b/images/rseq.proto -@@ -0,0 +1,9 @@ -+// SPDX-License-Identifier: MIT -+ -+syntax = "proto2"; -+ -+message rseq_entry { -+ required uint64 rseq_abi_pointer = 1; -+ required uint32 rseq_abi_size = 2; -+ required uint32 signature = 3; -+} --- -2.30.0 - diff --git a/0007-zdtm-add-simple-test-for-rseq-C-R.patch b/0007-zdtm-add-simple-test-for-rseq-C-R.patch deleted file mode 100644 index bb317ed499f4a7d9dbc17a12b4e3200c3eb574c5..0000000000000000000000000000000000000000 --- a/0007-zdtm-add-simple-test-for-rseq-C-R.patch +++ /dev/null @@ -1,217 +0,0 @@ -From 5005c08e32dc29dbf0b3a2a582e75d249c190d96 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 14:54:28 +0800 -Subject: [PATCH 07/16] zdtm: add simple test for rseq C/R Signed-off-by: - Alexander Mikhalitsyn - ---- - test/zdtm/static/Makefile | 1 + - test/zdtm/static/rseq00.c | 174 +++++++++++++++++++++++ - test/zdtm/static/rseq00.desc | 1 + - 3 files changed, 176 insertions(+) - create mode 100644 test/zdtm/static/rseq00.c - create mode 100644 test/zdtm/static/rseq00.desc - -diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile -index 70123cf..563d947 100644 ---- a/test/zdtm/static/Makefile -+++ b/test/zdtm/static/Makefile -@@ -61,6 +61,7 @@ TST_NOFILE := \ - pthread02 \ - pthread_timers \ - pthread_timers_h \ -+ rseq00 \ - vdso00 \ - vdso01 \ - vdso02 \ -diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c -new file mode 100644 -index 0000000..26f41a2 ---- /dev/null -+++ b/test/zdtm/static/rseq00.c -@@ -0,0 +1,174 @@ -+/* -+ * test for rseq() syscall -+ * See also https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ -+ * https://github.com/torvalds/linux/commit/d7822b1e24f2df5df98c76f0e94a5416349ff759 -+ */ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "zdtmtst.h" -+ -+#if defined(__x86_64__) -+ -+const char *test_doc = "Check that rseq() basic C/R works"; -+const char *test_author = "Alexander Mikhalitsyn "; -+/* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */ -+ -+/* some useful definitions from kernel uapi */ -+enum rseq_flags { -+ RSEQ_FLAG_UNREGISTER = (1 << 0), -+}; -+ -+struct rseq { -+ uint32_t cpu_id_start; -+ uint32_t cpu_id; -+ uint64_t rseq_cs; -+ uint32_t flags; -+} __attribute__((aligned(4 * sizeof(uint64_t)))); -+ -+#ifndef __NR_rseq -+#define __NR_rseq 334 -+#endif -+/* EOF */ -+ -+static __thread volatile struct rseq __rseq_abi; -+ -+#define RSEQ_SIG 0x53053053 -+ -+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) -+{ -+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); -+} -+ -+static void register_thread(void) -+{ -+ int rc; -+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); -+ if (rc) { -+ fail("Failed to register rseq"); -+ exit(1); -+ } -+} -+ -+static void unregister_thread(void) -+{ -+ int rc; -+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG); -+ if (rc) { -+ fail("Failed to unregister rseq"); -+ exit(1); -+ } -+} -+ -+static void check_thread(void) -+{ -+ int rc; -+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); -+ if (!(rc && errno == EBUSY)) { -+ fail("Failed to check rseq %d", rc); -+ exit(1); -+ } -+} -+ -+#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x)) -+ -+static int rseq_addv(intptr_t *v, intptr_t count, int cpu) -+{ -+ /* clang-format off */ -+ __asm__ __volatile__ goto( -+ ".pushsection __rseq_table, \"aw\"\n\t" -+ ".balign 32\n\t" -+ "cs_obj:\n\t" -+ /* version, flags */ -+ ".long 0, 0\n\t" -+ /* start_ip, post_commit_ip, abort_ip */ -+ ".quad 1f, (2f-1f), 4f\n\t" -+ ".popsection\n\t" -+ "1:\n\t" -+ "leaq cs_obj(%%rip), %%rax\n\t" -+ "movq %%rax, %[rseq_cs]\n\t" -+ "cmpl %[cpu_id], %[current_cpu_id]\n\t" -+ "jnz 4f\n\t" -+ "addq %[count], %[v]\n\t" /* final store */ -+ "2:\n\t" -+ ".pushsection __rseq_failure, \"ax\"\n\t" -+ /* Disassembler-friendly signature: nopl (%rip). */ -+ ".byte 0x0f, 0x1f, 0x05\n\t" -+ ".long 0x53053053\n\t" /* RSEQ_FLAGS */ -+ "4:\n\t" -+ "jmp abort\n\t" -+ ".popsection\n\t" -+ : /* gcc asm goto does not allow outputs */ -+ : [cpu_id] "r" (cpu), -+ [current_cpu_id] "m" (__rseq_abi.cpu_id), -+ [rseq_cs] "m" (__rseq_abi.rseq_cs), -+ /* final store input */ -+ [v] "m" (*v), -+ [count] "er" (count) -+ : "memory", "cc", "rax" -+ : abort -+ ); -+ /* clang-format on */ -+ -+ return 0; -+abort: -+ return -1; -+} -+ -+int main(int argc, char *argv[]) -+{ -+ int cpu, ret; -+ intptr_t *cpu_data; -+ long nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); -+ -+ test_init(argc, argv); -+ -+ cpu_data = calloc(nr_cpus, sizeof(*cpu_data)); -+ if (!cpu_data) { -+ fail("calloc"); -+ exit(EXIT_FAILURE); -+ } -+ -+ register_thread(); -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ check_thread(); -+ -+ cpu = RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start); -+ ret = rseq_addv(&cpu_data[cpu], 2, cpu); -+ if (ret) -+ fail("Failed to increment per-cpu counter"); -+ else -+ test_msg("cpu_data[%d] == %ld\n", cpu, (long int)cpu_data[cpu]); -+ -+ if (cpu_data[cpu] == 2) -+ pass(); -+ else -+ fail(); -+ -+ return 0; -+} -+ -+#else -+ -+int main(int argc, char *argv[]) -+{ -+ test_init(argc, argv); -+ skip("Unsupported arch"); -+ return 0; -+} -+ -+#endif -\ No newline at end of file -diff --git a/test/zdtm/static/rseq00.desc b/test/zdtm/static/rseq00.desc -new file mode 100644 -index 0000000..0324fa3 ---- /dev/null -+++ b/test/zdtm/static/rseq00.desc -@@ -0,0 +1 @@ -+{'flavor': 'h', 'arch': 'x86_64', 'feature': 'get_rseq_conf'} --- -2.30.0 - diff --git a/0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus.patch b/0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus.patch deleted file mode 100644 index 2f6b6420e6d98d2ba9bbbe3842fa420e2d6be905..0000000000000000000000000000000000000000 --- a/0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 56fad25776a652e143175a22676a1f909476c880 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 14:57:16 +0800 -Subject: [PATCH 08/16] ci: add Fedora Rawhide based test on Cirrus We have - ability to use nested virtualization on Cirrus, and already have "Vagrant - Fedora based test (no VDSO)" test, let's do analogical for Fedora Rawhide to - get fresh kernel. - -Suggested-by: Adrian Reber -Signed-off-by: Alexander Mikhalitsyn ---- - .cirrus.yml | 21 +++++++++++++++++++++ - scripts/ci/Makefile | 7 +++++-- - scripts/ci/run-ci-tests.sh | 5 +++++ - scripts/ci/vagrant.sh | 21 +++++++++++++++++++++ - 4 files changed, 52 insertions(+), 2 deletions(-) - -diff --git a/.cirrus.yml b/.cirrus.yml -index 671178d..9716e58 100644 ---- a/.cirrus.yml -+++ b/.cirrus.yml -@@ -19,6 +19,27 @@ task: - build_script: | - make -C scripts/ci vagrant-fedora-no-vdso - -+task: -+ name: Vagrant Fedora Rawhide based test -+ environment: -+ HOME: "/root" -+ CIRRUS_WORKING_DIR: "/tmp/criu" -+ -+ compute_engine_instance: -+ image_project: cirrus-images -+ image: family/docker-kvm -+ platform: linux -+ cpu: 4 -+ memory: 16G -+ nested_virtualization: true -+ -+ setup_script: | -+ scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker -+ sudo kvm-ok -+ ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto -+ build_script: | -+ make -C scripts/ci vagrant-fedora-rawhide -+ - task: - name: CentOS 8 based test - environment: -diff --git a/scripts/ci/Makefile b/scripts/ci/Makefile -index 02b4d87..9c9264d 100644 ---- a/scripts/ci/Makefile -+++ b/scripts/ci/Makefile -@@ -41,7 +41,7 @@ export CONTAINER_TERMINAL - ifeq ($(UNAME),x86_64) - # On anything besides x86_64 Travis is running unprivileged LXD - # containers which do not support running docker with '--privileged'. -- CONTAINER_OPTS := --rm $(CONTAINER_TERMINAL) --privileged -v /lib/modules:/lib/modules --tmpfs /run -+ CONTAINER_OPTS := --rm $(CONTAINER_TERMINAL) --privileged --userns=host --cgroupns=host -v /lib/modules:/lib/modules --tmpfs /run - else - CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run - endif -@@ -92,7 +92,10 @@ setup-vagrant: - vagrant-fedora-no-vdso: setup-vagrant - ./vagrant.sh fedora-no-vdso - --.PHONY: setup-vagrant vagrant-fedora-no-vdso -+vagrant-fedora-rawhide: setup-vagrant -+ ./vagrant.sh fedora-rawhide -+ -+.PHONY: setup-vagrant vagrant-fedora-no-vdso vagrant-fedora-rawhide - - %: - $(MAKE) -C ../build $@$(target-suffix) -diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh -index 7c66e68..95b4ec7 100755 ---- a/scripts/ci/run-ci-tests.sh -+++ b/scripts/ci/run-ci-tests.sh -@@ -194,6 +194,11 @@ if [ "${STREAM_TEST}" = "1" ]; then - exit 0 - fi - -+# print some useful debug info -+cat /proc/self/status -+ls -la /proc/self/ns -+cat /proc/self/cgroup -+ - # shellcheck disable=SC2086 - ./test/zdtm.py run -a -p 2 --keep-going $ZDTM_OPTS - -diff --git a/scripts/ci/vagrant.sh b/scripts/ci/vagrant.sh -index 839b100..f961b8d 100755 ---- a/scripts/ci/vagrant.sh -+++ b/scripts/ci/vagrant.sh -@@ -58,4 +58,25 @@ fedora-no-vdso() { - ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -t zdtm/transition/pidfd_store_sk --rpc --pre 2' - } - -+fedora-rawhide() { -+ #ssh default sudo grubby --update-kernel ALL --args="selinux=0 systemd.unified_cgroup_hierarchy=0" -+ ssh default sudo grubby --update-kernel ALL -+ # -+ # Workaround the problem: -+ # error running container: error from /usr/bin/crun creating container for [...]: sd-bus call: Transport endpoint is not connected -+ # Let's just use runc instead of crun -+ # see also https://github.com/kata-containers/tests/issues/4283 -+ # -+ ssh default 'sudo dnf remove -y crun || true' -+ ssh default sudo dnf install -y podman runc -+ vagrant reload -+ #ssh default sudo setenforce 0 -+ ssh default cat /proc/cmdline -+ ssh default ls -la /proc/self/ns -+ ssh default sudo cat /proc/self/status -+ ssh default sudo cat /proc/self/cgroup -+ #ssh default sudo capsh --print -+ ssh default 'cd /vagrant; tar xf criu.tar; cd criu; sudo -E make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"' -+} -+ - $1 --- -2.30.0 - diff --git a/0009-include-add-thread_pointer.h-from-Glibc.patch b/0009-include-add-thread_pointer.h-from-Glibc.patch deleted file mode 100644 index 51513496c42bd3b37918e71b05d328814955ee21..0000000000000000000000000000000000000000 --- a/0009-include-add-thread_pointer.h-from-Glibc.patch +++ /dev/null @@ -1,244 +0,0 @@ -From 99da2f789ca92aa52eeca07b97aee2cbd3d60fca Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:00:07 +0800 -Subject: [PATCH 09/16] include: add thread_pointer.h from Glibc Implementation - was taken from the Glibc. - -https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=8dbeb0561eeb876f557ac9eef5721912ec074ea5 -https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=cb976fba4c51ede7bf8cee5035888527c308dfbc - -Signed-off-by: Alexander Mikhalitsyn ---- - .../arch/aarch64/include/asm/thread_pointer.h | 27 ++++++++++++++ - .../arch/arm/include/asm/thread_pointer.h | 27 ++++++++++++++ - .../arch/mips/include/asm/thread_pointer.h | 27 ++++++++++++++ - .../arch/ppc64/include/asm/thread_pointer.h | 33 +++++++++++++++++ - .../arch/s390/include/asm/thread_pointer.h | 27 ++++++++++++++ - .../arch/x86/include/asm/thread_pointer.h | 37 +++++++++++++++++++ - 6 files changed, 178 insertions(+) - create mode 100644 criu/arch/aarch64/include/asm/thread_pointer.h - create mode 100644 criu/arch/arm/include/asm/thread_pointer.h - create mode 100644 criu/arch/mips/include/asm/thread_pointer.h - create mode 100644 criu/arch/ppc64/include/asm/thread_pointer.h - create mode 100644 criu/arch/s390/include/asm/thread_pointer.h - create mode 100644 criu/arch/x86/include/asm/thread_pointer.h - -diff --git a/criu/arch/aarch64/include/asm/thread_pointer.h b/criu/arch/aarch64/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..f7e0706 ---- /dev/null -+++ b/criu/arch/aarch64/include/asm/thread_pointer.h -@@ -0,0 +1,27 @@ -+/* __thread_pointer definition. Generic version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+static inline void *__criu_thread_pointer(void) -+{ -+ return __builtin_thread_pointer(); -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -diff --git a/criu/arch/arm/include/asm/thread_pointer.h b/criu/arch/arm/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..f7e0706 ---- /dev/null -+++ b/criu/arch/arm/include/asm/thread_pointer.h -@@ -0,0 +1,27 @@ -+/* __thread_pointer definition. Generic version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+static inline void *__criu_thread_pointer(void) -+{ -+ return __builtin_thread_pointer(); -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -diff --git a/criu/arch/mips/include/asm/thread_pointer.h b/criu/arch/mips/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..f7e0706 ---- /dev/null -+++ b/criu/arch/mips/include/asm/thread_pointer.h -@@ -0,0 +1,27 @@ -+/* __thread_pointer definition. Generic version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+static inline void *__criu_thread_pointer(void) -+{ -+ return __builtin_thread_pointer(); -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -diff --git a/criu/arch/ppc64/include/asm/thread_pointer.h b/criu/arch/ppc64/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..304516f ---- /dev/null -+++ b/criu/arch/ppc64/include/asm/thread_pointer.h -@@ -0,0 +1,33 @@ -+/* __thread_pointer definition. powerpc version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+#ifdef __powerpc64__ -+register void *__thread_register asm("r13"); -+#else -+register void *__thread_register asm("r2"); -+#endif -+ -+static inline void *__criu_thread_pointer(void) -+{ -+ return __thread_register; -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -\ No newline at end of file -diff --git a/criu/arch/s390/include/asm/thread_pointer.h b/criu/arch/s390/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..f7e0706 ---- /dev/null -+++ b/criu/arch/s390/include/asm/thread_pointer.h -@@ -0,0 +1,27 @@ -+/* __thread_pointer definition. Generic version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+static inline void *__criu_thread_pointer(void) -+{ -+ return __builtin_thread_pointer(); -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -diff --git a/criu/arch/x86/include/asm/thread_pointer.h b/criu/arch/x86/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..08603ae ---- /dev/null -+++ b/criu/arch/x86/include/asm/thread_pointer.h -@@ -0,0 +1,37 @@ -+/* __thread_pointer definition. x86 version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+static inline void *__criu_thread_pointer(void) -+{ -+#if __GNUC_PREREQ(11, 1) -+ return __builtin_thread_pointer(); -+#else -+ void *__result; -+#ifdef __x86_64__ -+ __asm__("mov %%fs:0, %0" : "=r"(__result)); -+#else -+ __asm__("mov %%gs:0, %0" : "=r"(__result)); -+#endif -+ return __result; -+#endif /* !GCC 11 */ -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -\ No newline at end of file --- -2.30.0 - diff --git a/0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch b/0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch deleted file mode 100644 index a8e8e995795c5e1940e84a1424df1cb3c707f7a7..0000000000000000000000000000000000000000 --- a/0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch +++ /dev/null @@ -1,102 +0,0 @@ -From d43ad9913c19afa6d80cb8124015d47361152db8 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:00:43 +0800 -Subject: [PATCH 10/16] clone-noasan: unregister rseq at the thread start for - new glibc Fresh glibc does rseq registration by default during - start_thread(). [ see - https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=95e114a0919d844d8fe07839cb6538b7f5ee920e - ] - -This cause process crashes during memory restore procedure, because -memory which corresponds to the struct rseq will be overwritten. - -See also -("nptl: Add public rseq symbols and ") -https://sourceware.org/git?p=glibc.git;a=commit;h=c901c3e764d7c7079f006b4e21e877d5036eb4f5 -("nptl: Add for defining __thread_pointer") -https://sourceware.org/git?p=glibc.git;a=commit;h=8dbeb0561eeb876f557ac9eef5721912ec074ea5 - -Signed-off-by: Alexander Mikhalitsyn ---- - criu/clone-noasan.c | 42 +++++++++++++++++++++++++++++++-- - 1 file changed, 40 insertions(+), 2 deletions(-) - -diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c -index d657ea2..5f8dd1b 100644 ---- a/criu/clone-noasan.c -+++ b/criu/clone-noasan.c -@@ -2,6 +2,13 @@ - #include - #include - -+#ifdef __has_include -+#if __has_include ("sys/rseq.h") -+#include -+#include "asm/thread_pointer.h" -+#endif -+#endif -+ - #include - - #include "sched.h" -@@ -34,16 +41,45 @@ - * ... wait for process to finish ... - * unlock_last_pid - */ -+ -+#if defined(RSEQ_SIG) -+static inline void unregister_glibc_rseq(void) -+{ -+ /* unregister rseq */ -+ syscall(__NR_rseq, (void *)((char *)__criu_thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG); -+} -+#else -+static inline void unregister_glibc_rseq(void) -+{ -+} -+#endif -+ -+struct call_fn_args { -+ int (*fn)(void *); -+ void *arg; -+}; -+ -+int call_fn(void *arg) -+{ -+ struct call_fn_args *cargs = arg; -+ unregister_glibc_rseq(); -+ return cargs->fn(cargs->arg); -+} -+ - int clone_noasan(int (*fn)(void *), int flags, void *arg) - { - void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 1024, 16); -+ struct call_fn_args a = { -+ .fn = fn, -+ .arg = arg, -+ }; - - BUG_ON((flags & CLONE_VM) && !(flags & CLONE_VFORK)); - /* - * Reserve some bytes for clone() internal needs - * and use as stack the address above this area. - */ -- return clone(fn, stack_ptr, flags, arg); -+ return clone(call_fn, stack_ptr, flags, (void *)&a); - } - - int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, int exit_signal, pid_t pid) -@@ -78,7 +114,9 @@ int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, int exit_sig - c_args.set_tid = ptr_to_u64(&pid); - c_args.set_tid_size = 1; - pid = syscall(__NR_clone3, &c_args, sizeof(c_args)); -- if (pid == 0) -+ if (pid == 0) { -+ unregister_glibc_rseq(); - exit(fn(arg)); -+ } - return pid; - } --- -2.30.0 - diff --git a/0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch b/0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch deleted file mode 100644 index e5745acef268a8f0b2677e54b27cd03bbacfe0b3..0000000000000000000000000000000000000000 --- a/0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 4f4d5acc34046954aea9e8ea10b5f71ff5f0fbd5 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:01:34 +0800 -Subject: [PATCH 11/16] zdtm/static/rseq00: fix rseq test when linking with a - fresh Glibc Fresh Glibc does rseq() register by default. We need to - unregister rseq before registering our own. - -Signed-off-by: Alexander Mikhalitsyn ---- - test/zdtm/static/rseq00.c | 76 ++++++++++++++++++++------- - 1 file changed, 58 insertions(+), 18 deletions(-) - -diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c -index 26f41a2..87053b8 100644 ---- a/test/zdtm/static/rseq00.c -+++ b/test/zdtm/static/rseq00.c -@@ -19,13 +19,48 @@ - - #include "zdtmtst.h" - --#if defined(__x86_64__) -+#ifdef __has_include -+#if __has_include("sys/rseq.h") -+#include -+#endif -+#endif -+ -+#if defined(__i386__) || defined(__x86_64__) -+ -+#if defined(RSEQ_SIG) -+static inline void *__criu_thread_pointer(void) -+{ -+#if __GNUC_PREREQ(11, 1) -+ return __builtin_thread_pointer(); -+#else -+ void *__result; -+#ifdef __x86_64__ -+ __asm__("mov %%fs:0, %0" : "=r"(__result)); -+#else -+ __asm__("mov %%gs:0, %0" : "=r"(__result)); -+#endif -+ return __result; -+#endif /* !GCC 11 */ -+} -+ -+static inline void unregister_glibc_rseq(void) -+{ -+ /* unregister rseq */ -+ syscall(__NR_rseq, (void *)((char *)__criu_thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG); -+} -+#else -+static inline void unregister_glibc_rseq(void) -+{ -+} -+#endif - - const char *test_doc = "Check that rseq() basic C/R works"; - const char *test_author = "Alexander Mikhalitsyn "; - /* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */ - - /* some useful definitions from kernel uapi */ -+#ifndef RSEQ_SIG -+ - enum rseq_flags { - RSEQ_FLAG_UNREGISTER = (1 << 0), - }; -@@ -37,14 +72,21 @@ struct rseq { - uint32_t flags; - } __attribute__((aligned(4 * sizeof(uint64_t)))); - -+#define RSEQ_SIG 0x53053053 -+ -+#endif -+ - #ifndef __NR_rseq - #define __NR_rseq 334 - #endif - /* EOF */ - --static __thread volatile struct rseq __rseq_abi; -+#define RSEQ_TLS_ALLOC 0 - --#define RSEQ_SIG 0x53053053 -+static volatile struct rseq *rseq_ptr; -+#if RSEQ_TLS_ALLOC -+static __thread volatile struct rseq __rseq_abi; -+#endif - - static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) - { -@@ -54,27 +96,18 @@ static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags - static void register_thread(void) - { - int rc; -- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); -+ unregister_glibc_rseq(); -+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG); - if (rc) { - fail("Failed to register rseq"); - exit(1); - } - } - --static void unregister_thread(void) --{ -- int rc; -- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG); -- if (rc) { -- fail("Failed to unregister rseq"); -- exit(1); -- } --} -- - static void check_thread(void) - { - int rc; -- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); -+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG); - if (!(rc && errno == EBUSY)) { - fail("Failed to check rseq %d", rc); - exit(1); -@@ -111,8 +144,8 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu) - ".popsection\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), -- [current_cpu_id] "m" (__rseq_abi.cpu_id), -- [rseq_cs] "m" (__rseq_abi.rseq_cs), -+ [current_cpu_id] "m" (rseq_ptr->cpu_id), -+ [rseq_cs] "m" (rseq_ptr->rseq_cs), - /* final store input */ - [v] "m" (*v), - [count] "er" (count) -@@ -132,6 +165,13 @@ int main(int argc, char *argv[]) - intptr_t *cpu_data; - long nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - -+#if RSEQ_TLS_ALLOC -+ rseq_ptr = &__rseq_abi; -+#else -+ //rseq_ptr = malloc(sizeof(struct rseq)); -+ rseq_ptr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, 0, 0); -+#endif -+ - test_init(argc, argv); - - cpu_data = calloc(nr_cpus, sizeof(*cpu_data)); -@@ -147,7 +187,7 @@ int main(int argc, char *argv[]) - - check_thread(); - -- cpu = RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start); -+ cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start); - ret = rseq_addv(&cpu_data[cpu], 2, cpu); - if (ret) - fail("Failed to increment per-cpu counter"); --- -2.30.0 - diff --git a/0012-compel-add-helpers-to-get-set-instruction-pointer.patch b/0012-compel-add-helpers-to-get-set-instruction-pointer.patch deleted file mode 100644 index 33acd47dde4265510ced29559200ada1b26505b1..0000000000000000000000000000000000000000 --- a/0012-compel-add-helpers-to-get-set-instruction-pointer.patch +++ /dev/null @@ -1,265 +0,0 @@ -From 06cb51057ce1cc31b79c6321273dfa0b4cb7f980 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:02:08 +0800 -Subject: [PATCH 12/16] compel: add helpers to get/set instruction pointer - Signed-off-by: Alexander Mikhalitsyn - ---- - .../src/lib/include/uapi/asm/infect-types.h | 9 +++++---- - .../src/lib/include/uapi/asm/infect-types.h | 9 +++++---- - .../src/lib/include/uapi/asm/infect-types.h | 9 +++++---- - .../src/lib/include/uapi/asm/infect-types.h | 9 +++++---- - .../src/lib/include/uapi/asm/infect-types.h | 7 ++++--- - .../src/lib/include/uapi/asm/infect-types.h | 9 +++++---- - compel/include/uapi/infect.h | 6 ++++++ - compel/src/lib/infect.c | 20 +++++++++++++++++++ - .../criu/arch/aarch64/include/asm/types.h | 2 ++ - criu/arch/arm/include/asm/types.h | 2 ++ - .../criu/arch/mips/include/asm/types.h | 2 ++ - .../criu/arch/ppc64/include/asm/types.h | 2 ++ - .../criu/arch/s390/include/asm/types.h | 2 ++ - criu/arch/x86/include/asm/types.h | 2 ++ - 14 files changed, 67 insertions(+), 23 deletions(-) - -diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h -index f91e73d..9d4ce7e 100644 ---- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h -@@ -23,10 +23,11 @@ typedef struct user_fpsimd_state user_fpregs_struct_t; - #define compel_arch_get_tls_task(ctl, tls) - #define compel_arch_get_tls_thread(tctl, tls) - --#define REG_RES(r) ((uint64_t)(r).regs[0]) --#define REG_IP(r) ((uint64_t)(r).pc) --#define REG_SP(r) ((uint64_t)((r).sp)) --#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8]) -+#define REG_RES(r) ((uint64_t)(r).regs[0]) -+#define REG_IP(r) ((uint64_t)(r).pc) -+#define SET_REG_IP(r, val) ((r).pc = (val)) -+#define REG_SP(r) ((uint64_t)((r).sp)) -+#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8]) - - #define user_regs_native(pregs) true - -diff --git a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h -index 159b6a9..8d32825 100644 ---- a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h -@@ -56,10 +56,11 @@ struct user_vfp_exc { - unsigned long fpinst2; - }; - --#define REG_RES(regs) ((regs).ARM_r0) --#define REG_IP(regs) ((regs).ARM_pc) --#define REG_SP(regs) ((regs).ARM_sp) --#define REG_SYSCALL_NR(regs) ((regs).ARM_r7) -+#define REG_RES(regs) ((regs).ARM_r0) -+#define REG_IP(regs) ((regs).ARM_pc) -+#define SET_REG_IP(regs, val) ((regs).ARM_pc = (val)) -+#define REG_SP(regs) ((regs).ARM_sp) -+#define REG_SYSCALL_NR(regs) ((regs).ARM_r7) - - #define user_regs_native(pregs) true - -diff --git a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h -index 70b3f85..481566a 100644 ---- a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h -@@ -56,10 +56,11 @@ static inline bool user_regs_native(user_regs_struct_t *pregs) - #define compel_arch_get_tls_task(ctl, tls) - #define compel_arch_get_tls_thread(tctl, tls) - --#define REG_RES(regs) ((regs).MIPS_v0) --#define REG_IP(regs) ((regs).cp0_epc) --#define REG_SP(regs) ((regs).MIPS_sp) --#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0) -+#define REG_RES(regs) ((regs).MIPS_v0) -+#define REG_IP(regs) ((regs).cp0_epc) -+#define SET_REG_IP(regs, val) ((regs).cp0_epc = (val)) -+#define REG_SP(regs) ((regs).MIPS_sp) -+#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0) - - //#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall) - #define __NR(syscall, compat) __NR_##syscall -diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h -index fe6192e..bf2cc95 100644 ---- a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h -@@ -72,10 +72,11 @@ typedef struct { - } tm; - } user_fpregs_struct_t; - --#define REG_RES(regs) ((uint64_t)(regs).gpr[3]) --#define REG_IP(regs) ((uint64_t)(regs).nip) --#define REG_SP(regs) ((uint64_t)(regs).gpr[1]) --#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0]) -+#define REG_RES(regs) ((uint64_t)(regs).gpr[3]) -+#define REG_IP(regs) ((uint64_t)(regs).nip) -+#define SET_REG_IP(regs, val) ((regs).nip = (val)) -+#define REG_SP(regs) ((uint64_t)(regs).gpr[1]) -+#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0]) - - #define user_regs_native(pregs) true - -diff --git a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h -index 896d70e..87283bc 100644 ---- a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h -@@ -62,9 +62,10 @@ typedef struct { - uint32_t system_call; - } user_regs_struct_t; - --#define REG_RES(r) ((uint64_t)(r).prstatus.gprs[2]) --#define REG_IP(r) ((uint64_t)(r).prstatus.psw.addr) --#define REG_SP(r) ((uint64_t)(r).prstatus.gprs[15]) -+#define REG_RES(r) ((uint64_t)(r).prstatus.gprs[2]) -+#define REG_IP(r) ((uint64_t)(r).prstatus.psw.addr) -+#define SET_REG_IP(r, val) ((r).prstatus.psw.addr = (val)) -+#define REG_SP(r) ((uint64_t)(r).prstatus.gprs[15]) - /* - * We assume that REG_SYSCALL_NR() is only used for pie code where we - * always use svc 0 with opcode in %r1. -diff --git a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h -index 34b3ad0..b35504f 100644 ---- a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h -@@ -127,10 +127,11 @@ typedef struct { - - typedef struct xsave_struct user_fpregs_struct_t; - --#define REG_RES(regs) get_user_reg(®s, ax) --#define REG_IP(regs) get_user_reg(®s, ip) --#define REG_SP(regs) get_user_reg(®s, sp) --#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax) -+#define REG_RES(regs) get_user_reg(®s, ax) -+#define REG_IP(regs) get_user_reg(®s, ip) -+#define SET_REG_IP(regs, val) set_user_reg(®s, ip, val) -+#define REG_SP(regs) get_user_reg(®s, sp) -+#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax) - - #define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall) - -diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h -index c3d2ee6..389878e 100644 ---- a/compel/include/uapi/infect.h -+++ b/compel/include/uapi/infect.h -@@ -168,4 +168,10 @@ extern unsigned long compel_task_size(void); - extern uint64_t compel_get_leader_sp(struct parasite_ctl *ctl); - extern uint64_t compel_get_thread_sp(struct parasite_thread_ctl *tctl); - -+extern uint64_t compel_get_leader_ip(struct parasite_ctl *ctl); -+extern uint64_t compel_get_thread_ip(struct parasite_thread_ctl *tctl); -+ -+void compel_set_leader_ip(struct parasite_ctl *ctl, uint64_t v); -+void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v); -+ - #endif -diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c -index 0fb9e71..6a13cc1 100644 ---- a/compel/src/lib/infect.c -+++ b/compel/src/lib/infect.c -@@ -1686,3 +1686,23 @@ uint64_t compel_get_thread_sp(struct parasite_thread_ctl *tctl) - { - return REG_SP(tctl->th.regs); - } -+ -+uint64_t compel_get_leader_ip(struct parasite_ctl *ctl) -+{ -+ return REG_IP(ctl->orig.regs); -+} -+ -+uint64_t compel_get_thread_ip(struct parasite_thread_ctl *tctl) -+{ -+ return REG_IP(tctl->th.regs); -+} -+ -+void compel_set_leader_ip(struct parasite_ctl *ctl, uint64_t v) -+{ -+ SET_REG_IP(ctl->orig.regs, v); -+} -+ -+void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v) -+{ -+ SET_REG_IP(tctl->th.regs, v); -+} -diff --git a/criu/arch/aarch64/include/asm/types.h b/criu/arch/aarch64/include/asm/types.h -index c860af1..363c1ca 100644 ---- a/criu/arch/aarch64/include/asm/types.h -+++ b/criu/arch/aarch64/include/asm/types.h -@@ -22,6 +22,8 @@ typedef UserAarch64RegsEntry UserRegsEntry; - - #define TI_SP(core) ((core)->ti_aarch64->gpregs->sp) - -+#define TI_IP(core) ((core)->ti_aarch64->gpregs->pc) -+ - static inline void *decode_pointer(uint64_t v) - { - return (void *)v; -diff --git a/criu/arch/arm/include/asm/types.h b/criu/arch/arm/include/asm/types.h -index cfcb8a1..93d2dc2 100644 ---- a/criu/arch/arm/include/asm/types.h -+++ b/criu/arch/arm/include/asm/types.h -@@ -21,6 +21,8 @@ typedef UserArmRegsEntry UserRegsEntry; - - #define TI_SP(core) ((core)->ti_arm->gpregs->sp) - -+#define TI_IP(core) ((core)->ti_arm->gpregs->ip) -+ - static inline void *decode_pointer(u64 v) - { - return (void *)(u32)v; -diff --git a/criu/arch/mips/include/asm/types.h b/criu/arch/mips/include/asm/types.h -index 237471f..2c75b6a 100644 ---- a/criu/arch/mips/include/asm/types.h -+++ b/criu/arch/mips/include/asm/types.h -@@ -18,6 +18,8 @@ - - #define CORE_THREAD_ARCH_INFO(core) core->ti_mips - -+#define TI_IP(core) ((core)->ti_mips->gpregs->cp0_epc) -+ - typedef UserMipsRegsEntry UserRegsEntry; - - static inline u64 encode_pointer(void *p) -diff --git a/criu/arch/ppc64/include/asm/types.h b/criu/arch/ppc64/include/asm/types.h -index fedeff2..d60aadd 100644 ---- a/criu/arch/ppc64/include/asm/types.h -+++ b/criu/arch/ppc64/include/asm/types.h -@@ -19,6 +19,8 @@ typedef UserPpc64RegsEntry UserRegsEntry; - - #define CORE_THREAD_ARCH_INFO(core) core->ti_ppc64 - -+#define TI_IP(core) ((core)->ti_ppc64->gpregs->nip) -+ - static inline void *decode_pointer(uint64_t v) - { - return (void *)v; -diff --git a/criu/arch/s390/include/asm/types.h b/criu/arch/s390/include/asm/types.h -index 7522cf2..abf12de 100644 ---- a/criu/arch/s390/include/asm/types.h -+++ b/criu/arch/s390/include/asm/types.h -@@ -19,6 +19,8 @@ typedef UserS390RegsEntry UserRegsEntry; - - #define CORE_THREAD_ARCH_INFO(core) core->ti_s390 - -+#define TI_IP(core) ((core)->ti_s390->gpregs->psw_addr) -+ - static inline u64 encode_pointer(void *p) - { - return (u64)p; -diff --git a/criu/arch/x86/include/asm/types.h b/criu/arch/x86/include/asm/types.h -index a0a8ed9..8919d0a 100644 ---- a/criu/arch/x86/include/asm/types.h -+++ b/criu/arch/x86/include/asm/types.h -@@ -28,6 +28,8 @@ static inline int core_is_compat(CoreEntry *c) - - #define CORE_THREAD_ARCH_INFO(core) core->thread_info - -+#define TI_IP(core) ((core)->thread_info->gpregs->ip) -+ - typedef UserX86RegsEntry UserRegsEntry; - - static inline u64 encode_pointer(void *p) --- -2.30.0 - diff --git a/0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs.patch b/0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs.patch deleted file mode 100644 index cd8ef176ddd77e4a13fee010fd690983e0973c71..0000000000000000000000000000000000000000 --- a/0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs.patch +++ /dev/null @@ -1,248 +0,0 @@ -From 33abfc12b973560b3d98afdbac7554b8c0542c3d Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:04:54 +0800 -Subject: [PATCH 13/16] cr-dump: fixup thread IP when inside rseq cs - Signed-off-by: Alexander Mikhalitsyn - ---- - criu/cr-dump.c | 155 +++++++++++++++++++++++++++- - criu/include/parasite.h | 2 + - criu/include/pstree.h | 1 + - 3 files changed, 154 insertions(+), 4 deletions(-) - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index 91dd08a..a3f8973 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -1047,11 +1047,58 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item) - return 0; - } - --static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep) -+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, struct rseq_cs *rseq_cs) -+{ -+ int ret; -+ uint64_t addr; -+ -+ /* rseq is not registered */ -+ if (!rseq->rseq_abi_pointer) -+ return 0; -+ -+ /* -+ * We need to cover the case when victim process was inside rseq critical section -+ * at the moment when CRIU comes and seized it. We need to determine the borders -+ * of rseq critical section at first. To achieve that we need to access thread -+ * memory and read pointer to struct rseq_cs. -+ * -+ * We have two ways to access thread memory: from the parasite and using ptrace(). -+ * But it this case we can't use parasite, because if victim process returns to the -+ * execution, on the kernel side __rseq_handle_notify_resume hook will be called, -+ * then rseq_ip_fixup() -> clear_rseq_cs() and user space memory with struct rseq -+ * will be cleared. So, let's use ptrace(PTRACE_PEEKDATA). -+ */ -+ ret = ptrace_peek_area(tid, &addr, decode_pointer(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), -+ sizeof(uint64_t)); -+ if (ret) { -+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs addr\n", tid, (unsigned long)&addr, -+ (unsigned long)(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t)); -+ return -1; -+ } -+ -+ /* (struct rseq)->rseq_cs is NULL */ -+ if (!addr) -+ return 0; -+ -+ ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(addr), sizeof(struct rseq_cs)); -+ if (ret) { -+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid, -+ (unsigned long)rseq_cs, (unsigned long)addr, sizeof(struct rseq_cs)); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int dump_thread_rseq(struct pstree_item *item, int i) - { - struct __ptrace_rseq_configuration rseq; - RseqEntry *rseqe = NULL; - int ret; -+ CoreEntry *core = item->core[i]; -+ RseqEntry **rseqep = &core->thread_core->rseq_entry; -+ struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i]; -+ pid_t tid = item->threads[i].real; - - /* - * If we are here it means that rseq() syscall is supported, -@@ -1076,7 +1123,8 @@ static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep) - return -1; - } - -- pr_err("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, rseq.signature); -+ pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, -+ rseq.signature); - - rseqe = xmalloc(sizeof(*rseqe)); - if (!rseqe) -@@ -1088,25 +1136,118 @@ static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep) - rseqe->rseq_abi_size = rseq.rseq_abi_size; - rseqe->signature = rseq.signature; - -+ if (read_rseq_cs(tid, &rseq, rseq_cs)) -+ goto err; -+ -+ /* save rseq entry to the image */ - *rseqep = rseqe; - - return 0; -+ -+err: -+ xfree(rseqe); -+ return -1; - } - - static int dump_task_rseq(pid_t pid, struct pstree_item *item) - { - int i; -+ struct rseq_cs *thread_rseq_cs; - - /* if rseq() syscall isn't supported then nothing to dump */ - if (!kdat.has_rseq) - return 0; - -+ thread_rseq_cs = xzalloc(sizeof(*thread_rseq_cs) * item->nr_threads); -+ if (!thread_rseq_cs) -+ return -1; -+ -+ dmpi(item)->thread_rseq_cs = thread_rseq_cs; -+ - for (i = 0; i < item->nr_threads; i++) { -- if (dump_thread_rseq(item->threads[i].real, &item->core[i]->thread_core->rseq_entry)) -- return -1; -+ if (dump_thread_rseq(item, i)) -+ goto free_rseq; - } - - return 0; -+ -+free_rseq: -+ xfree(thread_rseq_cs); -+ dmpi(item)->thread_rseq_cs = NULL; -+ return -1; -+} -+ -+static bool task_in_rseq(struct rseq_cs *rseq_cs, uint64_t addr) -+{ -+ return addr >= rseq_cs->start_ip && addr < rseq_cs->start_ip + rseq_cs->post_commit_offset; -+} -+ -+static int fixup_thread_rseq(struct pstree_item *item, int i) -+{ -+ CoreEntry *core = item->core[i]; -+ struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i]; -+ pid_t tid = item->threads[i].real; -+ -+ /* (struct rseq)->rseq_cs is NULL */ -+ if (!rseq_cs->start_ip) -+ return 0; -+ -+ pr_info("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n", -+ tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags, -+ rseq_cs->version, (unsigned long)TI_IP(core)); -+ -+ if (rseq_cs->version != 0) { -+ pr_err("unsupported RSEQ ABI version = %d\n", rseq_cs->version); -+ return -1; -+ } -+ -+ if (task_in_rseq(rseq_cs, TI_IP(core))) { -+ struct pid *tid = &item->threads[i]; -+ -+ pr_info("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n", -+ tid->real); -+ -+ /* -+ * We need to fixup task instruction pointer from -+ * the original one (which lays inside rseq critical section) -+ * to rseq abort handler address. -+ * -+ * It's worth to mention that we need to fixup IP in CoreEntry -+ * (used when full dump/restore is performed) and also in -+ * the parasite regs storage (used if --leave-running option is used, -+ * or if dump error occured and process execution is resumed). -+ */ -+ TI_IP(core) = rseq_cs->abort_ip; -+ -+ if (item->pid->real == tid->real) { -+ compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip); -+ } else { -+ compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip); -+ } -+ } -+ -+ return 0; -+} -+ -+static int fixup_task_rseq(pid_t pid, struct pstree_item *item) -+{ -+ int ret = 0; -+ int i; -+ -+ if (!kdat.has_ptrace_get_rseq_conf) -+ return 0; -+ -+ for (i = 0; i < item->nr_threads; i++) { -+ if (fixup_thread_rseq(item, i)) { -+ ret = -1; -+ goto exit; -+ } -+ } -+ -+exit: -+ xfree(dmpi(item)->thread_rseq_cs); -+ dmpi(item)->thread_rseq_cs = NULL; -+ return ret; - } - - static struct proc_pid_stat pps_buf; -@@ -1409,6 +1550,12 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - goto err; - } - -+ ret = fixup_task_rseq(pid, item); -+ if (ret) { -+ pr_err("Fixup rseq for %d failed %d\n", pid, ret); -+ goto err; -+ } -+ - if (fault_injected(FI_DUMP_EARLY)) { - pr_info("fault: CRIU sudden detach\n"); - kill(getpid(), SIGKILL); -diff --git a/criu/include/parasite.h b/criu/include/parasite.h -index 5fde809..d2a0688 100644 ---- a/criu/include/parasite.h -+++ b/criu/include/parasite.h -@@ -10,6 +10,8 @@ - #include - #include - -+#include "linux/rseq.h" -+ - #include "image.h" - #include "util-pie.h" - #include "common/lock.h" -diff --git a/criu/include/pstree.h b/criu/include/pstree.h -index c5b0fa7..458e5f9 100644 ---- a/criu/include/pstree.h -+++ b/criu/include/pstree.h -@@ -63,6 +63,7 @@ struct dmp_info { - struct parasite_ctl *parasite_ctl; - struct parasite_thread_ctl **thread_ctls; - uint64_t *thread_sp; -+ struct rseq_cs *thread_rseq_cs; - - /* - * Although we don't support dumping different struct creds in general, --- -2.30.0 - diff --git a/0014-zdtm-add-rseq-transition-test-for-amd64.patch b/0014-zdtm-add-rseq-transition-test-for-amd64.patch deleted file mode 100644 index d1379bd2dcbf7eff26bc8079a43e4a135e948c2a..0000000000000000000000000000000000000000 --- a/0014-zdtm-add-rseq-transition-test-for-amd64.patch +++ /dev/null @@ -1,250 +0,0 @@ -From f76aa4ade354649e3291b5e7274c368740b05417 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:05:34 +0800 -Subject: [PATCH 14/16] zdtm: add rseq transition test for amd64 Signed-off-by: - Alexander Mikhalitsyn - ---- - test/zdtm/transition/Makefile | 1 + - test/zdtm/transition/rseq01.c | 208 +++++++++++++++++++ - test/zdtm/transition/rseq01.desc | 1 + - 3 files changed, 210 insertions(+) - create mode 100644 test/zdtm/transition/rseq01.c - create mode 100644 test/zdtm/transition/rseq01.desc - -diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile -index 9388157..fae4e27 100644 ---- a/test/zdtm/transition/Makefile -+++ b/test/zdtm/transition/Makefile -@@ -23,6 +23,7 @@ TST_NOFILE = \ - lazy-thp \ - pid_reuse \ - pidfd_store_sk \ -+ rseq01 \ - - - TST_FILE = \ -diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c -new file mode 100644 -index 0000000..5fac5a6 ---- /dev/null -+++ b/test/zdtm/transition/rseq01.c -@@ -0,0 +1,208 @@ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "zdtmtst.h" -+ -+#ifdef __has_include -+# if __has_include ("sys/rseq.h") -+# include -+# endif -+#endif -+ -+#if defined(__x86_64__) -+ -+#if defined(__x86_64__) && defined(RSEQ_SIG) -+static inline void *thread_pointer(void) -+{ -+ void *result; -+ asm("mov %%fs:0, %0" : "=r"(result)); -+ return result; -+} -+ -+static inline void unregister_old_rseq(void) -+{ -+ /* unregister rseq */ -+ syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG); -+} -+#else -+static inline void unregister_old_rseq(void) -+{ -+} -+#endif -+ -+const char *test_doc = "rseq() transition test"; -+const char *test_author = "Alexander Mikhalitsyn "; -+ -+/* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */ -+ -+/* some useful definitions from kernel uapi */ -+#ifndef RSEQ_SIG -+ -+enum rseq_flags { -+ RSEQ_FLAG_UNREGISTER = (1 << 0), -+}; -+ -+struct rseq { -+ uint32_t cpu_id_start; -+ uint32_t cpu_id; -+ uint64_t rseq_cs; -+ uint32_t flags; -+} __attribute__((aligned(4 * sizeof(uint64_t)))); -+ -+#define RSEQ_SIG 0x53053053 -+ -+#endif -+ -+#ifndef __NR_rseq -+#define __NR_rseq 334 -+#endif -+/* EOF */ -+ -+static volatile struct rseq *rseq_ptr; -+static __thread volatile struct rseq __rseq_abi; -+ -+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) -+{ -+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); -+} -+ -+static void register_thread(void) -+{ -+ int rc; -+ unregister_old_rseq(); -+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG); -+ if (rc) { -+ fail("Failed to register rseq"); -+ exit(1); -+ } -+} -+ -+static void check_thread(void) -+{ -+ int rc; -+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG); -+ if (!(rc && errno == EBUSY)) { -+ fail("Failed to check rseq %d", rc); -+ exit(1); -+ } -+} -+ -+#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x)) -+ -+static int rseq_addv(intptr_t *v, intptr_t count, int cpu) -+{ -+ double a = 10000000000000000.0; -+ double b = -1; -+ /*test_msg("enter %f %f\n", a, b);*/ -+ -+ /* clang-format off */ -+ __asm__ __volatile__ goto( -+ ".pushsection __rseq_table, \"aw\"\n\t" -+ ".balign 32\n\t" -+ "cs_obj:\n\t" -+ /* version, flags */ -+ ".long 0, 0\n\t" -+ /* start_ip, post_commit_offset, abort_ip */ -+ ".quad 1f, (2f-1f), 4f\n\t" -+ ".popsection\n\t" -+ "1:\n\t" -+ "leaq cs_obj(%%rip), %%rax\n\t" -+ "movq %%rax, %[rseq_cs]\n\t" -+ "cmpl %[cpu_id], %[current_cpu_id]\n\t" -+ "jnz 4f\n\t" -+ "addq %[count], %[v]\n\t" /* final store */ -+ "mov $10000000, %%rcx\n\t" -+ "fldl %[x]\n\t" /* we have st clobbered */ -+ "5:\n\t" -+ "fsqrt\n\t" /* heavy instruction */ -+ "dec %%rcx\n\t" -+ "jnz 5b\n\t" -+ "fstpl %[y]\n\t" -+ "2:\n\t" -+ ".pushsection __rseq_failure, \"ax\"\n\t" -+ /* Disassembler-friendly signature: nopl (%rip). */ -+ ".byte 0x0f, 0xb9, 0x3d\n\t" -+ ".long 0x53053053\n\t" /* RSEQ_FLAGS */ -+ "4:\n\t" -+ /*"fstpl %[y]\n\t"*/ -+ "jmp %l[abort]\n\t" -+ /*"jmp 1b\n\t"*/ -+ ".popsection\n\t" -+ : /* gcc asm goto does not allow outputs */ -+ : [cpu_id] "r" (cpu), -+ [current_cpu_id] "m" (rseq_ptr->cpu_id), -+ [rseq_cs] "m" (rseq_ptr->rseq_cs), -+ /* final store input */ -+ [v] "m" (*v), -+ [count] "er" (count), -+ [x] "m" (a), -+ [y] "m" (b) -+ : "memory", "cc", "rax", "rcx", "st" -+ : abort -+ ); -+ /* clang-format on */ -+ /*test_msg("exit %f %f\n", a, b);*/ -+ return 0; -+abort: -+ /*test_msg("abort %f %f\n", a, b);*/ -+ return -1; -+} -+ -+int main(int argc, char *argv[]) -+{ -+ int cpu = 0; -+ int ret; -+ intptr_t *cpu_data; -+ long nr_cpus; -+ -+ rseq_ptr = &__rseq_abi; -+ memset((void *)rseq_ptr, 0, sizeof(struct rseq)); -+ -+ test_init(argc, argv); -+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); -+ -+ cpu_data = calloc(nr_cpus, sizeof(*cpu_data)); -+ if (!cpu_data) { -+ fail("calloc"); -+ exit(EXIT_FAILURE); -+ } -+ register_thread(); -+ -+ test_daemon(); -+ -+ while (test_go()) { -+ cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start); -+ ret = rseq_addv(&cpu_data[cpu], 2, cpu); -+ if (ret) -+ fail("Failed to increment per-cpu counter"); -+ } -+ -+ test_waitsig(); -+ -+ check_thread(); -+ pass(); -+ -+ return 0; -+} -+ -+#else -+ -+int main(int argc, char *argv[]) -+{ -+ test_init(argc, argv); -+ skip("Unsupported arch"); -+ return 0; -+} -+ -+#endif -diff --git a/test/zdtm/transition/rseq01.desc b/test/zdtm/transition/rseq01.desc -new file mode 100644 -index 0000000..0324fa3 ---- /dev/null -+++ b/test/zdtm/transition/rseq01.desc -@@ -0,0 +1 @@ -+{'flavor': 'h', 'arch': 'x86_64', 'feature': 'get_rseq_conf'} --- -2.30.0 - diff --git a/0015-cr-dump-handle-rseq-flags-field.patch b/0015-cr-dump-handle-rseq-flags-field.patch deleted file mode 100644 index d54477411aa7b36382bcc340eece9441cc69abed..0000000000000000000000000000000000000000 --- a/0015-cr-dump-handle-rseq-flags-field.patch +++ /dev/null @@ -1,330 +0,0 @@ -From deac94521c373c13add63eaf88118187ea3c2cb2 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:09:44 +0800 -Subject: [PATCH 15/16] cr-dump: handle rseq flags field Userspace may - configure rseq critical section by def - -Signed-off-by: Alexander Mikhalitsyn ---- - criu/cr-dump.c | 86 +++++++++++++++++++------------ - criu/cr-restore.c | 63 ++++++++++++++++++++++ - criu/include/pstree.h | 1 + - images/rseq.proto | 1 + - 4 files changed, 119 insertions(+), 32 deletions(-) - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index a3f8973..79387fb 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -1047,13 +1047,13 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item) - return 0; - } - --static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, struct rseq_cs *rseq_cs) -+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseqc, -+ struct rseq_cs *rseq_cs, struct rseq *rseq) - { - int ret; -- uint64_t addr; - - /* rseq is not registered */ -- if (!rseq->rseq_abi_pointer) -+ if (!rseqc->rseq_abi_pointer) - return 0; - - /* -@@ -1068,22 +1068,21 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, str - * then rseq_ip_fixup() -> clear_rseq_cs() and user space memory with struct rseq - * will be cleared. So, let's use ptrace(PTRACE_PEEKDATA). - */ -- ret = ptrace_peek_area(tid, &addr, decode_pointer(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), -- sizeof(uint64_t)); -+ ret = ptrace_peek_area(tid, rseq, decode_pointer(rseqc->rseq_abi_pointer), -+ sizeof(struct rseq)); - if (ret) { -- pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs addr\n", tid, (unsigned long)&addr, -- (unsigned long)(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t)); -+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq struct\n", tid, (unsigned long)rseq, -+ (unsigned long)(rseqc->rseq_abi_pointer), sizeof(uint64_t)); - return -1; - } - -- /* (struct rseq)->rseq_cs is NULL */ -- if (!addr) -+ if (!rseq->rseq_cs.ptr64) - return 0; - -- ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(addr), sizeof(struct rseq_cs)); -+ ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(rseq->rseq_cs.ptr64), sizeof(struct rseq_cs)); - if (ret) { - pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid, -- (unsigned long)rseq_cs, (unsigned long)addr, sizeof(struct rseq_cs)); -+ (unsigned long)rseq_cs, (unsigned long)rseq->rseq_cs.ptr64, sizeof(struct rseq_cs)); - return -1; - } - -@@ -1092,11 +1091,12 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, str - - static int dump_thread_rseq(struct pstree_item *item, int i) - { -- struct __ptrace_rseq_configuration rseq; -+ struct __ptrace_rseq_configuration rseqc; - RseqEntry *rseqe = NULL; - int ret; - CoreEntry *core = item->core[i]; - RseqEntry **rseqep = &core->thread_core->rseq_entry; -+ struct rseq rseq; - struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i]; - pid_t tid = item->threads[i].real; - -@@ -1111,20 +1111,20 @@ static int dump_thread_rseq(struct pstree_item *item, int i) - if (!kdat.has_ptrace_get_rseq_conf) - return 0; - -- ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseq), &rseq); -- if (ret != sizeof(rseq)) { -+ ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseqc), &rseqc); -+ if (ret != sizeof(rseqc)) { - pr_perror("ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) = %d", tid, ret); - return -1; - } - -- if (rseq.flags != 0) { -+ if (rseqc.flags != 0) { - pr_err("something wrong with ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) flags = 0x%x\n", tid, -- rseq.flags); -+ rseqc.flags); - return -1; - } - -- pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, -- rseq.signature); -+ pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseqc.rseq_abi_pointer, -+ rseqc.signature); - - rseqe = xmalloc(sizeof(*rseqe)); - if (!rseqe) -@@ -1132,13 +1132,22 @@ static int dump_thread_rseq(struct pstree_item *item, int i) - - rseq_entry__init(rseqe); - -- rseqe->rseq_abi_pointer = rseq.rseq_abi_pointer; -- rseqe->rseq_abi_size = rseq.rseq_abi_size; -- rseqe->signature = rseq.signature; -+ rseqe->rseq_abi_pointer = rseqc.rseq_abi_pointer; -+ rseqe->rseq_abi_size = rseqc.rseq_abi_size; -+ rseqe->signature = rseqc.signature; - -- if (read_rseq_cs(tid, &rseq, rseq_cs)) -+ if (read_rseq_cs(tid, &rseqc, rseq_cs, &rseq)) - goto err; - -+ rseqe->has_rseq_cs_pointer = true; -+ rseqe->rseq_cs_pointer = rseq.rseq_cs.ptr64; -+ pr_err("cs pointer %lx\n", rseqe->rseq_cs_pointer); -+ /* we won't save rseq_cs to the image (only pointer), -+ * so let's combine flags from both struct rseq and struct rseq_cs -+ * (kernel does the same when interpreting RSEQ_CS_FLAG_*) -+ */ -+ rseq_cs->flags |= rseq.flags; -+ - /* save rseq entry to the image */ - *rseqep = rseqe; - -@@ -1188,11 +1197,11 @@ static int fixup_thread_rseq(struct pstree_item *item, int i) - struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i]; - pid_t tid = item->threads[i].real; - -- /* (struct rseq)->rseq_cs is NULL */ -+ /* equivalent to (struct rseq)->rseq_cs is NULL */ - if (!rseq_cs->start_ip) - return 0; - -- pr_info("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n", -+ pr_debug("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n", - tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags, - rseq_cs->version, (unsigned long)TI_IP(core)); - -@@ -1204,25 +1213,38 @@ static int fixup_thread_rseq(struct pstree_item *item, int i) - if (task_in_rseq(rseq_cs, TI_IP(core))) { - struct pid *tid = &item->threads[i]; - -- pr_info("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n", -- tid->real); -- - /* - * We need to fixup task instruction pointer from - * the original one (which lays inside rseq critical section) -- * to rseq abort handler address. -+ * to rseq abort handler address. But we need to look on rseq_cs->flags -+ * (please refer to struct rseq -> flags field description). -+ * Naive idea of flags support may be like... let's change instruction pointer (IP) -+ * to rseq_cs->abort_ip if !(rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL). -+ * But unfortunately, it doesn't work properly, because the kernel does -+ * clean up of rseq_cs field in the struct rseq (modifies userspace memory). -+ * So, we need to preserve original value of (struct rseq)->rseq_cs field in the -+ * image and restore it's value before releasing threads. - * - * It's worth to mention that we need to fixup IP in CoreEntry - * (used when full dump/restore is performed) and also in - * the parasite regs storage (used if --leave-running option is used, - * or if dump error occured and process execution is resumed). - */ -- TI_IP(core) = rseq_cs->abort_ip; - -- if (item->pid->real == tid->real) { -- compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip); -+ if (rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) { -+ pr_err("The %d task is in rseq critical section.!!! IP will be set to rseq abort handler addr\n", -+ tid->real); - } else { -- compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip); -+ pr_warn("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n", -+ tid->real); -+ -+ TI_IP(core) = rseq_cs->abort_ip; -+ -+ if (item->pid->real == tid->real) { -+ compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip); -+ } else { -+ compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip); -+ } - } - } - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index b2bd044..864140f 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -23,6 +23,7 @@ - #include "common/compiler.h" - - #include "linux/mount.h" -+#include "linux/rseq.h" - - #include "clone-noasan.h" - #include "cr_options.h" -@@ -779,6 +780,7 @@ static int open_cores(int pid, CoreEntry *leader_core) - { - int i, tpid; - CoreEntry **cores = NULL; -+ //RseqEntry *rseqs; - - cores = xmalloc(sizeof(*cores) * current->nr_threads); - if (!cores) -@@ -812,6 +814,19 @@ static int open_cores(int pid, CoreEntry *leader_core) - } - } - -+ -+ pr_err("item %lx\n", (uint64_t)current); -+ -+ for (i = 0; i < current->nr_threads; i++) { -+ ThreadCoreEntry *tc = cores[i]->thread_core; -+ -+ /* compatibility with older CRIU versions */ -+ if (!tc->rseq_entry) -+ continue; -+ -+ current->rseqe[i] = *tc->rseq_entry; -+ } -+ - return 0; - err: - xfree(cores); -@@ -868,8 +883,15 @@ static int restore_one_alive_task(int pid, CoreEntry *core) - { - unsigned args_len; - struct task_restore_args *ta; -+ RseqEntry *rseqs; - pr_info("Restoring resources\n"); - -+ rseqs = shmalloc(sizeof(*rseqs) * current->nr_threads); -+ if (!rseqs) -+ return -1; -+ -+ current->rseqe = rseqs; -+ - rst_mem_switch_to_private(); - - args_len = round_up(sizeof(*ta) + sizeof(struct thread_restore_args) * current->nr_threads, page_size()); -@@ -1966,6 +1988,44 @@ static int attach_to_tasks(bool root_seized) - return 0; - } - -+static int restore_rseq_cs(void) -+{ -+ struct pstree_item *item; -+ -+ for_each_pstree_item(item) { -+ int i; -+ -+ if (!task_alive(item)) -+ continue; -+ -+ if (item->nr_threads == 1) { -+ item->threads[0].real = item->pid->real; -+ } else { -+ if (parse_threads(item->pid->real, &item->threads, &item->nr_threads)) -+ return -1; -+ } -+ -+ for (i = 0; i < item->nr_threads; i++) { -+ pid_t pid = item->threads[i].real; -+ -+ if (!item->rseqe[i].rseq_cs_pointer || !item->rseqe[i].rseq_abi_pointer) { -+ pr_err("item %lx rseqe %lx\n", (uint64_t)item, (uint64_t)item->rseqe); -+ pr_err("nothing to do with cs_pointer\n"); -+ continue; -+ } -+ -+ pr_err("restoring cs ... %lx \n", item->rseqe[i].rseq_cs_pointer); -+ -+ if (ptrace_poke_area(pid, &item->rseqe[i].rseq_cs_pointer, (void *)(item->rseqe[i].rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t))) { -+ pr_err("Can't restore memfd args (pid: %d)\n", pid); -+ return -1; -+ } -+ } -+ } -+ -+ return 0; -+} -+ - static int catch_tasks(bool root_seized, enum trace_flags *flag) - { - struct pstree_item *item; -@@ -2400,6 +2460,9 @@ skip_ns_bouncing: - if (restore_freezer_state()) - pr_err("Unable to restore freezer state\n"); - -+ /* just before releasing threads we have to restore rseq_cs */ -+ restore_rseq_cs(); -+ - /* Detaches from processes and they continue run through sigreturn. */ - if (finalize_restore_detach()) - goto out_kill_network_unlocked; -diff --git a/criu/include/pstree.h b/criu/include/pstree.h -index 458e5f9..97bef11 100644 ---- a/criu/include/pstree.h -+++ b/criu/include/pstree.h -@@ -25,6 +25,7 @@ struct pstree_item { - int nr_threads; /* number of threads */ - struct pid *threads; /* array of threads */ - CoreEntry **core; -+ RseqEntry *rseqe; - TaskKobjIdsEntry *ids; - union { - futex_t task_st; -diff --git a/images/rseq.proto b/images/rseq.proto -index be28004..45cb847 100644 ---- a/images/rseq.proto -+++ b/images/rseq.proto -@@ -6,4 +6,5 @@ message rseq_entry { - required uint64 rseq_abi_pointer = 1; - required uint32 rseq_abi_size = 2; - required uint32 signature = 3; -+ optional uint64 rseq_cs_pointer = 4; - } --- -2.30.0 - diff --git a/0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch b/0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch deleted file mode 100644 index 73038a7dc5a9970c43b6a11d933a5ae52b11a152..0000000000000000000000000000000000000000 --- a/0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch +++ /dev/null @@ -1,177 +0,0 @@ -From bb8295ae4f1224db2236fdd3134912e093ed20d9 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:10:24 +0800 -Subject: [PATCH 16/16] zdtm: add rseq02 transition test with NO_RESTART CS - flag Signed-off-by: Alexander Mikhalitsyn - - ---- - test/zdtm/transition/Makefile | 2 + - test/zdtm/transition/rseq01.c | 61 +++++++++++++++++++- - test/zdtm/transition/rseq02.c | 1 + - test/zdtm/transition/rseq02.desc | 1 + - 4 files changed, 63 insertions(+), 2 deletions(-) - create mode 120000 test/zdtm/transition/rseq02.c - create mode 120000 test/zdtm/transition/rseq02.desc - -diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile -index fae4e27..378a4fc 100644 ---- a/test/zdtm/transition/Makefile -+++ b/test/zdtm/transition/Makefile -@@ -24,6 +24,7 @@ TST_NOFILE = \ - pid_reuse \ - pidfd_store_sk \ - rseq01 \ -+ rseq02 \ - - - TST_FILE = \ -@@ -82,6 +83,7 @@ ptrace: LDFLAGS += -pthread - fork2: CFLAGS += -D FORK2 - thread-bomb.o: CFLAGS += -pthread - thread-bomb: LDFLAGS += -pthread -+rseq02: CFLAGS += -D NOABORT - - %: %.sh - cp $< $@ -diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c -index 5fac5a6..25e1d61 100644 ---- a/test/zdtm/transition/rseq01.c -+++ b/test/zdtm/transition/rseq01.c -@@ -53,6 +53,18 @@ enum rseq_flags { - RSEQ_FLAG_UNREGISTER = (1 << 0), - }; - -+enum rseq_cs_flags_bit { -+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, -+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, -+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, -+}; -+ -+enum rseq_cs_flags { -+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), -+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), -+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), -+}; -+ - struct rseq { - uint32_t cpu_id_start; - uint32_t cpu_id; -@@ -104,6 +116,7 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu) - { - double a = 10000000000000000.0; - double b = -1; -+ uint64_t rseq_cs1, rseq_cs2; - /*test_msg("enter %f %f\n", a, b);*/ - - /* clang-format off */ -@@ -129,6 +142,9 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu) - "dec %%rcx\n\t" - "jnz 5b\n\t" - "fstpl %[y]\n\t" -+ "movq %%rax, %[rseq_cs_check2]\n\t" -+ "movq %[rseq_cs], %%rax\n\t" -+ "movq %%rax, %[rseq_cs_check1]\n\t" - "2:\n\t" - ".pushsection __rseq_failure, \"ax\"\n\t" - /* Disassembler-friendly signature: nopl (%rip). */ -@@ -143,6 +159,8 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu) - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_ptr->cpu_id), - [rseq_cs] "m" (rseq_ptr->rseq_cs), -+ [rseq_cs_check1] "m" (rseq_cs1), -+ [rseq_cs_check2] "m" (rseq_cs2), - /* final store input */ - [v] "m" (*v), - [count] "er" (count), -@@ -153,8 +171,20 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu) - ); - /* clang-format on */ - /*test_msg("exit %f %f\n", a, b);*/ -+ test_msg("%lx %lx\n", rseq_cs1, rseq_cs2); -+ if (rseq_cs1 != rseq_cs2) { -+ /* -+ * It means that we finished critical section -+ * *normally* (haven't jumped to abort) but the kernel had cleaned up -+ * rseq_ptr->rseq_cs before we left critical section -+ * and CRIU wasn't restored it correctly. -+ * That's a bug picture. -+ */ -+ return -1; -+ } - return 0; - abort: -+ test_msg("%lx %lx\n", rseq_cs1, rseq_cs2); - /*test_msg("abort %f %f\n", a, b);*/ - return -1; - } -@@ -177,21 +207,48 @@ int main(int argc, char *argv[]) - fail("calloc"); - exit(EXIT_FAILURE); - } -+ - register_thread(); - -+ /* -+ * We want to test that RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL -+ * is handled properly by CRIU, but that flag can be used -+ * only with all another flags set. -+ * Please, refer to -+ * https://github.com/torvalds/linux/blob/master/kernel/rseq.c#L192 -+ */ -+#ifdef NOABORT -+ rseq_ptr->flags = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | -+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | -+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE; -+#endif -+ - test_daemon(); - - while (test_go()) { - cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start); - ret = rseq_addv(&cpu_data[cpu], 2, cpu); -- if (ret) -+#ifndef NOABORT -+ /* just ignore abort */ -+ ret = 0; -+#else -+ if (ret) { - fail("Failed to increment per-cpu counter"); -+ break; -+ } else { -+ //test_msg("cpu_data[%d] == %ld\n", cpu, (long int)cpu_data[cpu]); -+ } -+#endif - } - - test_waitsig(); - - check_thread(); -- pass(); -+ -+ if (ret) -+ fail(); -+ else -+ pass(); - - return 0; - } -diff --git a/test/zdtm/transition/rseq02.c b/test/zdtm/transition/rseq02.c -new file mode 120000 -index 0000000..d564917 ---- /dev/null -+++ b/test/zdtm/transition/rseq02.c -@@ -0,0 +1 @@ -+rseq01.c -\ No newline at end of file -diff --git a/test/zdtm/transition/rseq02.desc b/test/zdtm/transition/rseq02.desc -new file mode 120000 -index 0000000..b888f0d ---- /dev/null -+++ b/test/zdtm/transition/rseq02.desc -@@ -0,0 +1 @@ -+rseq01.desc -\ No newline at end of file --- -2.30.0 - diff --git a/criu.spec b/criu.spec index 941a786d3a5b4a75dd9b0597488b138f3ac2e26d..ff4fe1fdb88ed60c5c28b99ef90accdc29e39115 100644 --- a/criu.spec +++ b/criu.spec @@ -1,6 +1,6 @@ Name: criu Version: 3.16.1 -Release: 7 +Release: 9 Provides: crtools = %{version}-%{release} Obsoletes: crtools <= 1.0-2 Summary: A tool of Checkpoint/Restore in User-space @@ -17,21 +17,7 @@ Obsoletes: %{name}-libs < %{version}-%{release} Patch1: 0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch Patch2: 0002-mm-add-pin-memory-method-for-criu.patch -Patch3: 0002-compel-add-rseq-syscall-into-compel-std-plugin-sysca.patch -Patch4: 0003-kerndat-check-for-rseq-syscall-support.patch -Patch5: 0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch -Patch6: 0005-cr-check-Add-ptrace-rseq-conf-dump-feature.patch -Patch7: 0006-rseq-initial-support.patch -Patch8: 0007-zdtm-add-simple-test-for-rseq-C-R.patch -Patch9: 0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus.patch -Patch10: 0009-include-add-thread_pointer.h-from-Glibc.patch -Patch11: 0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch -Patch12: 0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch -Patch13: 0012-compel-add-helpers-to-get-set-instruction-pointer.patch -Patch14: 0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs.patch -Patch15: 0014-zdtm-add-rseq-transition-test-for-amd64.patch -Patch16: 0015-cr-dump-handle-rseq-flags-field.patch -Patch17: 0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch +Patch3: revert-fix-BUG-at-criu-pstree.c-452.patch Patch1000: 1000-backport-page-pipe-fix-limiting-a-pipe-size.patch Patch1001: 1001-backport-tty-fix-the-null-pointer-of-get_tty_driver.patch @@ -119,6 +105,12 @@ chmod 0755 %{buildroot}/run/%{name}/ %doc %{_mandir}/man1/{compel.1*,crit.1*,criu-ns.1*} %changelog +* Mon Jun 19 2023 hewenliang <314264452@qq.com> - 3.16.1-9 +- revert: fix BUG at criu/pstree.c:452 + +* Mon Jun 19 2023 hewenliang <314264452@qq.com> - 3.16.1-8 +- revert "rseq c/r support" + * Tue Nov 22 2022 Hewenliang - 3.16.1-7 - fix the null pointer of get_tty_driver. - criu files Dont cache fd ids for device files. diff --git a/revert-fix-BUG-at-criu-pstree.c-452.patch b/revert-fix-BUG-at-criu-pstree.c-452.patch new file mode 100644 index 0000000000000000000000000000000000000000..077e35bbde795817100d0c552c9d18da4b56f00c --- /dev/null +++ b/revert-fix-BUG-at-criu-pstree.c-452.patch @@ -0,0 +1,90 @@ +Subject: [PATCH 1/1] revert: fix BUG at criu/pstree.c:452 + +Not all the process which is dumped by criu is session leader, the +enhancing verification is annoying, because it causes many problems in +some testcases. Therefore, revert this bugfix. + +If the bugfix is resumed, using `setsid` to start process is necessary, +and using `stdbuf -oL` to redirect standard output at the same time. + +This bug detail sees #1332. + +Revert "pstree: don't change sid/gid-s if current sid/gid is the same" +This reverts commit 90e03b1a1142ca40fb78de9eb04944ab51d06eeb. + +Revert "pstree: check for pid collision before switching to new sid/gid" +This reverts commit 7e6a1a7011b404fbf0108b062bda118e9a696b60. +--- + criu/pstree.c | 37 ++++++++++--------------------------- + 1 file changed, 10 insertions(+), 27 deletions(-) + +diff --git a/criu/pstree.c b/criu/pstree.c +index d5080e515..bf09c761c 100644 +--- a/criu/pstree.c ++++ b/criu/pstree.c +@@ -340,7 +340,6 @@ static int prepare_pstree_for_shell_job(pid_t pid) + pid_t current_gid = getpgid(pid); + + struct pstree_item *pi; +- struct pid *tmp; + + pid_t old_sid; + pid_t old_gid; +@@ -348,7 +347,6 @@ static int prepare_pstree_for_shell_job(pid_t pid) + if (!opts.shell_job) + return 0; + +- /* root_item is a session leader */ + if (root_item->sid == vpid(root_item)) + return 0; + +@@ -370,37 +368,22 @@ static int prepare_pstree_for_shell_job(pid_t pid) + */ + + old_sid = root_item->sid; +- if (old_sid != current_sid) { +- pr_info("Migrating process tree (SID %d->%d)\n", old_sid, current_sid); + +- tmp = pstree_pid_by_virt(current_sid); +- if (tmp) { +- pr_err("Current sid %d intersects with pid (%d) in images\n", current_sid, tmp->state); +- return -1; +- } ++ pr_info("Migrating process tree (SID %d->%d)\n", ++ old_sid, current_sid); + +- for_each_pstree_item(pi) { +- if (pi->sid == old_sid) +- pi->sid = current_sid; +- } +- +- if (lookup_create_item(current_sid) == NULL) +- return -1; ++ for_each_pstree_item(pi) { ++ if (pi->sid == old_sid) ++ pi->sid = current_sid; + } + +- /* root_item is a group leader */ +- if (root_item->pgid == vpid(root_item)) +- return 0; +- + old_gid = root_item->pgid; +- if (old_gid != current_gid) { +- pr_info("Migrating process tree (GID %d->%d)\n", old_gid, current_gid); +- +- tmp = pstree_pid_by_virt(current_gid); +- if (tmp) { +- pr_err("Current gid %d intersects with pid (%d) in images\n", current_gid, tmp->state); ++ if (old_gid != vpid(root_item)) { ++ if (lookup_create_item(current_sid) == NULL) + return -1; +- } ++ ++ pr_info("Migrating process tree (GID %d->%d)\n", ++ old_gid, current_gid); + + for_each_pstree_item(pi) { + if (pi->pgid == old_gid) +-- +2.35.1 \ No newline at end of file