diff --git a/0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch b/0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch index 5ad6a5c1f3c631b59d658136d7ef96ed1556fdf8..7ccdd8c1c8aaa4dc7f782a84ce0b3884047b5f77 100644 --- a/0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch +++ b/0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch @@ -1,7 +1,7 @@ -From 746a5dd20bb688e1d830e216059e1de7e59186a3 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Tue, 13 Apr 2021 10:39:45 +0800 -Subject: [PATCH 01/72] criu: dump and restore cpu affinity of each thread +From f700bf90e9339d41132a62c24dde9b1c0fa8a9ee Mon Sep 17 00:00:00 2001 +From: snoweay +Date: Thu, 25 Apr 2024 17:36:32 +0800 +Subject: [PATCH] criu: dump and restore cpu affinity of each thread Criu should dump and restore threads' or processes' cpu affinity. @@ -38,40 +38,43 @@ Signed-off-by: Sang Yan create mode 100644 test/zdtm/static/cpu-affinity0.desc diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def -index e6508ed..1b877d1 100644 +index 7489ee0..9403fc2 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def -@@ -116,5 +116,6 @@ fsopen 430 430 (char *fsname, unsigned int flags) +@@ -118,6 +118,7 @@ fsopen 430 430 (char *fsname, unsigned int flags) fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux) fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags) clone3 435 435 (struct clone_args *uargs, size_t size) +sched_setaffinity 122 241 (int fd, size_t cpusetsize, const cpu_set_t *mask) pidfd_open 434 434 (pid_t pid, unsigned int flags) + openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size) pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl -index 1bb626b..dd79187 100644 +index 4c9b75c..43ecc40 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl -@@ -112,5 +112,6 @@ __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +@@ -114,6 +114,7 @@ __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) +__NR_sched_setaffinity 222 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask) __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) + __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size) __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl -index 7178bf4..282adaf 100644 +index af7d550..ef72a2f 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl -@@ -112,5 +112,6 @@ __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +@@ -114,6 +114,7 @@ __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) +__NR_sched_setaffinity 239 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask) __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) + __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size) __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl -index 7e456cd..3fe3194 100644 +index ab36a5c..6c6522a 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl @@ -63,6 +63,7 @@ __NR_mincore 218 sys_mincore (void *addr, unsigned long size, unsigned char * @@ -83,7 +86,7 @@ index 7e456cd..3fe3194 100644 __NR_get_thread_area 244 sys_get_thread_area (user_desc_t *info) __NR_io_setup 245 sys_io_setup (unsigned nr_reqs, aio_context_t *ctx32p) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl -index 2dfcc6e..c1d119d 100644 +index 57681b7..4746bd2 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -73,6 +73,7 @@ __NR_mount 165 sys_mount (char *dev_nmae, char *dir_name, char *type, unsign @@ -95,22 +98,22 @@ index 2dfcc6e..c1d119d 100644 __NR_io_setup 206 sys_io_setup (unsigned nr_events, aio_context_t *ctx) __NR_io_getevents 208 sys_io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo) diff --git a/criu/config.c b/criu/config.c -index 91fb0b6..71f99c9 100644 +index 1322a49..f3f4666 100644 --- a/criu/config.c +++ b/criu/config.c -@@ -695,6 +695,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - { "cgroup-yard", required_argument, 0, 1096 }, +@@ -698,6 +698,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, { "pre-dump-mode", required_argument, 0, 1097 }, { "file-validation", required_argument, 0, 1098 }, + BOOL_OPT("skip-file-rwx-check", &opts.skip_file_rwx_check), + BOOL_OPT("with-cpu-affinity", &opts.with_cpu_affinity), { "lsm-mount-context", required_argument, 0, 1099 }, { "network-lock", required_argument, 0, 1100 }, - {}, + BOOL_OPT("mntns-compat-mode", &opts.mntns_compat_mode), diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index 940f622..f07fe6e 100644 +index ee5974a..eb540b0 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c -@@ -139,6 +139,7 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc) +@@ -140,6 +140,7 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc) { int ret; struct sched_param sp; @@ -118,7 +121,7 @@ index 940f622..f07fe6e 100644 BUILD_BUG_ON(SCHED_OTHER != 0); /* default in proto message */ -@@ -183,6 +184,18 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc) +@@ -184,6 +185,18 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc) pr_info("\tdumping %d nice for %d\n", ret, pid); tc->has_sched_nice = true; tc->sched_nice = ret; @@ -138,10 +141,10 @@ index 940f622..f07fe6e 100644 return 0; } diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 9d2d957..5b645c1 100644 +index 2700497..9855a3d 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c -@@ -118,6 +118,7 @@ static int prepare_restorer_blob(void); +@@ -120,6 +120,7 @@ static int prepare_restorer_blob(void); static int prepare_rlimits(int pid, struct task_restore_args *, CoreEntry *core); static int prepare_posix_timers(int pid, struct task_restore_args *ta, CoreEntry *core); static int prepare_signals(int pid, struct task_restore_args *, CoreEntry *core); @@ -149,7 +152,7 @@ index 9d2d957..5b645c1 100644 /* * Architectures can overwrite this function to restore registers that are not -@@ -899,6 +900,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core) +@@ -921,6 +922,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core) if (prepare_signals(pid, ta, core)) return -1; @@ -159,7 +162,7 @@ index 9d2d957..5b645c1 100644 if (prepare_posix_timers(pid, ta, core)) return -1; -@@ -3153,6 +3157,24 @@ out: +@@ -3298,6 +3302,24 @@ out: return ret; } @@ -184,7 +187,7 @@ index 9d2d957..5b645c1 100644 extern void __gcov_flush(void) __attribute__((weak)); void __gcov_flush(void) { -@@ -3603,6 +3625,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns +@@ -3762,6 +3784,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns RST_MEM_FIXUP_PPTR(task_args->timerfd); RST_MEM_FIXUP_PPTR(task_args->posix_timers); RST_MEM_FIXUP_PPTR(task_args->siginfo); @@ -193,33 +196,33 @@ index 9d2d957..5b645c1 100644 RST_MEM_FIXUP_PPTR(task_args->helpers); RST_MEM_FIXUP_PPTR(task_args->zombies); diff --git a/criu/crtools.c b/criu/crtools.c -index 6a75cd1..b5a36b9 100644 +index 94657f4..a8df89d 100644 --- a/criu/crtools.c +++ b/criu/crtools.c -@@ -445,6 +445,8 @@ usage: +@@ -509,6 +509,8 @@ usage: " --file-validation METHOD\n" " pass the validation method to be used; argument\n" " can be 'filesize' or 'buildid' (default).\n" + " --with-cpu-affinity Allow to restore cpu affinity. Only for hosts with\n" + " same cpu quantity.\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" + " --skip-file-rwx-check\n" + " Skip checking file permissions\n" + " (r/w/x for u/g/o) on restore.\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index a34f8db..3b50e59 100644 +index 60cf943..db7484a 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h -@@ -188,6 +188,8 @@ struct cr_options { +@@ -212,6 +212,8 @@ struct cr_options { /* This stores which method to use for file validation. */ int file_validation_method; + /* restore cpu affinity */ + int with_cpu_affinity; - }; - extern struct cr_options opts; + /* Shows the mode criu is running at the moment: dump/pre-dump/restore/... */ + enum criu_mode mode; diff --git a/criu/include/restorer.h b/criu/include/restorer.h -index 934d60c..c2ef8f0 100644 +index f398d8d..668aa65 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -1,6 +1,7 @@ @@ -230,7 +233,7 @@ index 934d60c..c2ef8f0 100644 #include #include #include -@@ -162,6 +163,8 @@ struct task_restore_args { +@@ -170,6 +171,8 @@ struct task_restore_args { siginfo_t *siginfo; unsigned int siginfo_n; @@ -240,10 +243,10 @@ index 934d60c..c2ef8f0 100644 unsigned int tcp_socks_n; diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c -index 4304691..fbc89fe 100644 +index 0297165..d8e7d41 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c -@@ -425,6 +425,40 @@ static int restore_signals(siginfo_t *ptr, int nr, bool group) +@@ -477,6 +477,40 @@ static int restore_signals(siginfo_t *ptr, int nr, bool group) return 0; } @@ -281,10 +284,10 @@ index 4304691..fbc89fe 100644 + return 0; +} + - static int restore_seccomp_filter(pid_t tid, struct thread_restore_args *args) + static int restore_rseq(struct rst_rseq_param *rseq) { - unsigned int flags = args->seccomp_force_tsync ? SECCOMP_FILTER_FLAG_TSYNC : 0; -@@ -1856,6 +1890,10 @@ long __export_restore_task(struct task_restore_args *args) + int ret; +@@ -2139,6 +2173,10 @@ __visible long __export_restore_task(struct task_restore_args *args) if (ret) goto core_restore_end; @@ -296,7 +299,7 @@ index 4304691..fbc89fe 100644 rst_tcp_socks_all(args); diff --git a/criu/pstree.c b/criu/pstree.c -index d5080e5..778c884 100644 +index 8c44e71..efc52c0 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -58,11 +58,13 @@ CoreEntry *core_entry_alloc(int th, int tsk) @@ -326,10 +329,10 @@ index d5080e5..778c884 100644 xfree(core); core = NULL; diff --git a/images/core.proto b/images/core.proto -index b713119..39e7f32 100644 +index 5b07b5c..e4ca389 100644 --- a/images/core.proto +++ b/images/core.proto -@@ -83,6 +83,10 @@ message thread_sas_entry { +@@ -90,6 +90,10 @@ message thread_sas_entry { required uint32 ss_flags = 3; } @@ -340,19 +343,19 @@ index b713119..39e7f32 100644 message thread_core_entry { required uint64 futex_rla = 1; required uint32 futex_rla_len = 2; -@@ -101,6 +105,7 @@ message thread_core_entry { - - optional string comm = 13; +@@ -110,6 +114,7 @@ message thread_core_entry { optional uint64 blk_sigset_extended = 14; -+ required thread_allowedcpus_entry allowed_cpus = 15; + optional rseq_entry rseq_entry = 15; + optional uint32 cg_set = 16; ++ required thread_allowedcpus_entry allowed_cpus = 17; } message task_rlimits_entry { diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile -index c9e6589..70123cf 100644 +index 07d3bc6..c25a46f 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile -@@ -246,6 +246,7 @@ TST_NOFILE := \ +@@ -266,6 +266,7 @@ TST_NOFILE := \ timens_nested \ timens_for_kids \ zombie_leader \ @@ -416,5 +419,5 @@ index 0000000..0d0b8ae @@ -0,0 +1 @@ +{'dopts': '', 'ropts': '--with-cpu-affinity', 'flags': 'reqrst '} -- -2.34.1 +2.33.0 diff --git a/0002-compel-add-rseq-syscall-into-compel-std-plugin-sysca.patch b/0002-compel-add-rseq-syscall-into-compel-std-plugin-sysca.patch deleted file mode 100644 index 5f72eb9d75770b6fa03f1b502ce26cf413e6a9c6..0000000000000000000000000000000000000000 --- a/0002-compel-add-rseq-syscall-into-compel-std-plugin-sysca.patch +++ /dev/null @@ -1,74 +0,0 @@ -From dc6dbe893f7a8b644b655a56e4a0edfb854c577f Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 13:28:51 +0800 -Subject: [PATCH 02/72] compel: add rseq syscall into compel std plugin syscall - tables Add rseq syscall numbers for: arm/aarch64, mips64, ppc64le, s390, - x86_64/x86 - -Signed-off-by: Alexander Mikhalitsyn ---- - compel/arch/arm/plugins/std/syscalls/syscall.def | 1 + - compel/arch/mips/plugins/std/syscalls/syscall_64.tbl | 1 + - compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl | 1 + - compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl | 1 + - compel/arch/x86/plugins/std/syscalls/syscall_32.tbl | 1 + - compel/arch/x86/plugins/std/syscalls/syscall_64.tbl | 1 + - 6 files changed, 6 insertions(+) - -diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def -index 1b877d1..bb78cbb 100644 ---- a/compel/arch/arm/plugins/std/syscalls/syscall.def -+++ b/compel/arch/arm/plugins/std/syscalls/syscall.def -@@ -119,3 +119,4 @@ clone3 435 435 (struct clone_args *uargs, size_t size) - sched_setaffinity 122 241 (int fd, size_t cpusetsize, const cpu_set_t *mask) - pidfd_open 434 434 (pid_t pid, unsigned int flags) - pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags) -+rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl -index 7a6db19..95dc7d3 100644 ---- a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl -+++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl -@@ -115,3 +115,4 @@ __NR_fsmount 5432 sys_fsmount (int fd, unsigned int flags, unsigned int attr - __NR_clone3 5435 sys_clone3 (struct clone_args *uargs, size_t size) - __NR_pidfd_open 5434 sys_pidfd_open (pid_t pid, unsigned int flags) - __NR_pidfd_getfd 5438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) -+__NR_rseq 5327 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl -index dd79187..ad0d94f 100644 ---- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl -+++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl -@@ -115,3 +115,4 @@ __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) - __NR_sched_setaffinity 222 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask) - __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) - __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) -+__NR_rseq 387 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl -index 282adaf..916b697 100644 ---- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl -+++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl -@@ -115,3 +115,4 @@ __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) - __NR_sched_setaffinity 239 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask) - __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) - __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) -+__NR_rseq 383 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl -index 3fe3194..90f23d5 100644 ---- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl -+++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl -@@ -103,3 +103,4 @@ __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_f - __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) - __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) - __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) -+__NR_rseq 386 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl -index c1d119d..323fab1 100644 ---- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl -+++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl -@@ -114,3 +114,4 @@ __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_ - __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) - __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags) - __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags) -+__NR_rseq 334 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) --- -2.34.1 - diff --git a/0020-mm-add-pin-memory-method-for-criu.patch b/0002-mm-add-pin-memory-method-for-criu.patch similarity index 45% rename from 0020-mm-add-pin-memory-method-for-criu.patch rename to 0002-mm-add-pin-memory-method-for-criu.patch index 6f150ce103fa57f49e6013f073db3f0057821963..f53018ee3cdc4eff08bac925b38c9ba7a9dfb948 100644 --- a/0020-mm-add-pin-memory-method-for-criu.patch +++ b/0002-mm-add-pin-memory-method-for-criu.patch @@ -1,42 +1,23 @@ -From 3858f7e228b15d0e1ce553f530fda4da9aa4efab Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Fri, 23 Apr 2021 21:22:08 +0800 -Subject: [PATCH 20/72] mm: add pin memory method for criu +From dc9ba08388bfb3aa28225d9cd5a4f779c10e23a9 Mon Sep 17 00:00:00 2001 +From: anatasluo +Date: Sat, 26 Feb 2022 02:48:25 +0000 +Subject: [PATCH 2/2] mm: add pin memory method for criu -Add pin memory method for criu to improve memory recover +Add pin memory for criu to improve memory recover speed and avoid user private data saving to files. -Signed-off-by: Jingxian He -Signed-off-by: fu.lin +Signed-off-by: anatasluo --- - criu/Makefile.crtools | 1 + - criu/config.c | 1 + - criu/cr-dump.c | 9 +++ - criu/cr-restore.c | 2 + - criu/crtools.c | 1 + - criu/include/cr_options.h | 1 + - criu/include/pin-mem.h | 49 +++++++++++++ - criu/include/restorer.h | 1 + - criu/mem.c | 16 +++++ - criu/pie/restorer.c | 26 ++++++- - criu/pin-mem.c | 146 ++++++++++++++++++++++++++++++++++++++ - criu/seize.c | 6 ++ - 12 files changed, 258 insertions(+), 1 deletion(-) - create mode 100644 criu/include/pin-mem.h - create mode 100644 criu/pin-mem.c + criu/config.c | 1 + + criu/cr-restore.c | 5 ++ + criu/crtools.c | 1 + + criu/include/cr_options.h | 1 + + criu/include/restorer.h | 28 ++++++++++++ + criu/mem.c | 96 +++++++++++++++++++++++++++++++++++++++ + criu/pie/restorer.c | 25 +++++++++- + criu/seize.c | 1 + + 8 files changed, 157 insertions(+), 1 deletion(-) -diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools -index 50a2fa9..98c4135 100644 ---- a/criu/Makefile.crtools -+++ b/criu/Makefile.crtools -@@ -90,6 +90,7 @@ obj-y += servicefd.o - obj-y += pie-util-vdso.o - obj-y += vdso.o - obj-y += timens.o -+obj-y += pin-mem.o - obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o - obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o - CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 diff --git a/criu/config.c b/criu/config.c index 71f99c9..53a5cfd 100644 --- a/criu/config.c @@ -49,55 +30,31 @@ index 71f99c9..53a5cfd 100644 { "lsm-mount-context", required_argument, 0, 1099 }, { "network-lock", required_argument, 0, 1100 }, {}, -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index 79387fb..5fac9ce 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -86,6 +86,7 @@ - #include "pidfd-store.h" - #include "apparmor.h" - #include "asm/dump.h" -+#include "pin-mem.h" - - /* - * Architectures can overwrite this function to restore register sets that -@@ -2058,6 +2059,14 @@ static int cr_dump_finish(int ret) - close_service_fd(CR_PROC_FD_OFF); - close_image_dir(); - -+ if (ret == 0 && opts.pin_memory) { -+ pr_info("start restore_task_special_pages\n"); -+ restore_task_special_pages(0); -+ } else if (ret != 0 && opts.pin_memory) { -+ pr_info("clear pin mem info\n"); -+ clear_pin_mem(0); -+ } -+ - if (ret) { - pr_err("Dumping FAILED.\n"); - } else { diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 864140f..5514c29 100644 +index 5b645c1..6d6e63f 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c -@@ -3885,6 +3885,8 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns +@@ -3805,6 +3805,11 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns task_args, task_args->t->pid, task_args->nr_threads, task_args->clone_restore_fn, task_args->thread_args); -+ task_args->pin_memory = opts.pin_memory; ++ if (opts.pin_memory) ++ task_args->pin_memory = true; ++ else ++ task_args->pin_memory = false; + /* * An indirect call to task_restore, note it never returns * and restoring core is extremely destructive. diff --git a/criu/crtools.c b/criu/crtools.c -index b5a36b9..1b90481 100644 +index b5a36b9..0cd4d11 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -447,6 +447,7 @@ usage: " can be 'filesize' or 'buildid' (default).\n" " --with-cpu-affinity Allow to restore cpu affinity. Only for hosts with\n" " same cpu quantity.\n" -+ " --pin-memory Use pin memory method for checkpoint and restore.\n" ++ " --pin-memory Use pin memory method for checkpoint and restore.\n" "\n" "Check options:\n" " Without options, \"criu check\" checks availability of absolutely required\n" @@ -113,66 +70,11 @@ index 3b50e59..61898fd 100644 }; extern struct cr_options opts; -diff --git a/criu/include/pin-mem.h b/criu/include/pin-mem.h -new file mode 100644 -index 0000000..7e53b12 ---- /dev/null -+++ b/criu/include/pin-mem.h -@@ -0,0 +1,49 @@ -+#ifndef __CRIU_PIN_MEM_H__ -+#define __CRIU_PIN_MEM_H__ -+ -+#include -+ -+#include "vma.pb-c.h" -+ -+#if __has_include("linux/pin_memory.h") -+# include -+#else -+ -+#define PIN_MEM_MAGIC 0x59 -+#define _SET_PIN_MEM_AREA 1 -+#define _CLEAR_PIN_MEM_AREA 2 -+#define _REMAP_PIN_MEM_AREA 3 -+#define _DUMP_SEPCIAL_PAGES 6 -+#define _RETORE_SEPCIAL_PAGES 7 -+ -+#define SET_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _SET_PIN_MEM_AREA, struct pin_mem_area_set) -+#define CLEAR_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _CLEAR_PIN_MEM_AREA, int) -+#define REMAP_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _REMAP_PIN_MEM_AREA, int) -+#define DUMP_SPECIAL_PAGES _IOW(PIN_MEM_MAGIC, _DUMP_SEPCIAL_PAGES, int) -+#define RETORE_SPECIAL_PAGES _IOW(PIN_MEM_MAGIC, _RETORE_SEPCIAL_PAGES, int) -+ -+#define MAX_PIN_MEM_AREA_NUM 16 -+ -+struct _pin_mem_area { -+ unsigned long virt_start; -+ unsigned long virt_end; -+}; -+ -+struct pin_mem_area_set { -+ unsigned int pid; -+ unsigned int area_num; -+ struct _pin_mem_area mem_area[MAX_PIN_MEM_AREA_NUM]; -+}; -+ -+#endif /* __has_include("linux/pin_memory.h") */ -+ -+#define PIN_MEM_FILE "/dev/pinmem" -+#define ONCE_PIN_MEM_SIZE_LIMIT (32 * 1024 * 1024) -+ -+bool should_pin_vmae(VmaEntry *vmae); -+int pin_vmae(VmaEntry *vmae, struct pstree_item *item); -+int dump_task_special_pages(int pid); -+int restore_task_special_pages(int pid); -+int clear_pin_mem(int pid); -+ -+#endif /* __CRIU_PIN_MEM_H__ */ diff --git a/criu/include/restorer.h b/criu/include/restorer.h -index c29d869..e0bdc04 100644 +index c2ef8f0..c5dcf94 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h -@@ -232,6 +232,7 @@ struct task_restore_args { +@@ -225,6 +225,7 @@ struct task_restore_args { int lsm_type; int child_subreaper; bool has_clone3_set_tid; @@ -180,113 +82,46 @@ index c29d869..e0bdc04 100644 } __aligned(64); /* -diff --git a/criu/mem.c b/criu/mem.c -index ca74bfb..07efdbe 100644 ---- a/criu/mem.c -+++ b/criu/mem.c -@@ -31,6 +31,7 @@ - #include "prctl.h" - #include "compel/infect-util.h" - #include "pidfd-store.h" -+#include "pin-mem.h" - - #include "protobuf.h" - #include "images/pagemap.pb-c.h" -@@ -500,6 +501,17 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit - goto out_xfer; - } +@@ -316,4 +317,31 @@ enum { + #define __r_sym(name) restorer_sym##name + #define restorer_sym(rblob, name) (void *)(rblob + __r_sym(name)) -+ if (opts.pin_memory) { -+ /* pin memory before dump pages */ -+ list_for_each_entry(vma_area, &vma_area_list->h, list) { -+ if (should_pin_vmae(vma_area->e) -+ && pin_vmae(vma_area->e, item) != 0) { -+ exit_code = -1; -+ goto out_xfer; -+ } -+ } -+ } ++#define PIN_MEM_FILE "/dev/pinmem" ++#define PIN_MEM_MAGIC 0x59 ++#define _SET_PIN_MEM_AREA 1 ++#define _CLEAR_PIN_MEM_AREA 2 ++#define _REMAP_PIN_MEM_AREA 3 ++#define _DUMP_SEPCIAL_PAGES 6 ++#define _RETORE_SEPCIAL_PAGES 7 ++#define SET_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _SET_PIN_MEM_AREA, struct pin_mem_area_set) ++#define CLEAR_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _CLEAR_PIN_MEM_AREA, int) ++#define REMAP_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _REMAP_PIN_MEM_AREA, int) ++#define DUMP_SEPCIAL_PAGES _IOW(PIN_MEM_MAGIC, _DUMP_SEPCIAL_PAGES, int) ++#define RETORE_SEPCIAL_PAGES _IOW(PIN_MEM_MAGIC, _RETORE_SEPCIAL_PAGES, int) ++ ++#define ONCE_PIN_MEM_SIZE_LIMIT 32 * 1024 * 1024 ++#define MAX_PIN_MEM_AREA_NUM 16 ++ ++struct pin_mem_area { ++ unsigned long virt_start; ++ unsigned long virt_end; ++}; + - /* - * Step 1 -- generate the pagemap - */ -@@ -509,6 +521,10 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit - parent_predump_mode = mdc->parent_ie->pre_dump_mode; - - list_for_each_entry(vma_area, &vma_area_list->h, list) { -+ if (opts.pin_memory && should_pin_vmae(vma_area->e)) { -+ continue; -+ } ++struct pin_mem_area_set { ++ unsigned int pid; ++ unsigned int area_num; ++ struct pin_mem_area mem_area[MAX_PIN_MEM_AREA_NUM]; ++}; + - ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, &pmc, has_parent, mdc->pre_dump, - parent_predump_mode); - if (ret < 0) -diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c -index 368b5a0..db01ba5 100644 ---- a/criu/pie/restorer.c -+++ b/criu/pie/restorer.c -@@ -49,6 +49,7 @@ - - #include "shmem.h" - #include "restorer.h" -+#include "pin-mem.h" - - #ifndef PR_SET_PDEATHSIG - #define PR_SET_PDEATHSIG 1 -@@ -1408,6 +1409,24 @@ int cleanup_current_inotify_events(struct task_restore_args *task_args) - return 0; + #endif /* __CR_RESTORER_H__ */ +diff --git a/criu/mem.c b/criu/mem.c +index ca74bfb..e95c8de 100644 +--- a/criu/mem.c ++++ b/criu/mem.c +@@ -432,6 +432,85 @@ again: + return ret; } -+int remap_vmas(int pid) -+{ -+ int fd, ret = 0; -+ -+ fd = sys_open(PIN_MEM_FILE, O_RDWR, 0); -+ if (fd == -1) { -+ pr_err("open file: %s fail.\n", PIN_MEM_FILE); -+ return -1;; -+ } -+ -+ ret = sys_ioctl(fd, REMAP_PIN_MEM_AREA, (unsigned long) &pid); -+ if (ret < 0) -+ pr_err("remap pin mem fail for pid: %d\n", pid); -+ sys_close(fd); -+ return ret; -+} -+ -+ - /* - * The main routine to restore task via sigreturn. - * This one is very special, we never return there -@@ -1577,7 +1596,12 @@ long __export_restore_task(struct task_restore_args *args) - goto core_restore_end; - } - } -- -+ if (args->pin_memory) { -+ if (remap_vmas(my_pid) < 0) { -+ pr_err("Remap vmas fail\n"); -+ goto core_restore_end; -+ } -+ } - /* - * Now read the contents (if any) - */ -diff --git a/criu/pin-mem.c b/criu/pin-mem.c -new file mode 100644 -index 0000000..b18db97 ---- /dev/null -+++ b/criu/pin-mem.c -@@ -0,0 +1,146 @@ -+#include -+#include -+#include -+ -+#include "pstree.h" -+#include "mem.h" -+#include "vma.h" -+#include "pin-mem.h" -+ +bool should_pin_vmae(VmaEntry *vmae) +{ + /* @@ -314,19 +149,18 @@ index 0000000..b18db97 +} + +static int pin_one_pmas(int fd, unsigned long start, -+ unsigned long *pend, struct pstree_item *item) ++ unsigned long *pend, struct pstree_item *item) +{ + int ret; + unsigned int index = 0; + unsigned long end; + unsigned long next = start; + struct pin_mem_area_set pmas; -+ struct _pin_mem_area *pma; ++ struct pin_mem_area *pma; + + end = *pend; + while (start < end) { -+ next = (start + ONCE_PIN_MEM_SIZE_LIMIT > end) -+ ? end : (start + ONCE_PIN_MEM_SIZE_LIMIT); ++ next = (start + ONCE_PIN_MEM_SIZE_LIMIT > end) ? end : (start + ONCE_PIN_MEM_SIZE_LIMIT); + pma = &(pmas.mem_area[index]); + pma->virt_start = start; + pma->virt_end = next; @@ -335,18 +169,16 @@ index 0000000..b18db97 + if (index >= MAX_PIN_MEM_AREA_NUM) + break; + } -+ + *pend = next; + pmas.area_num = index; + pmas.pid = vpid(item); -+ + ret = ioctl(fd, SET_PIN_MEM_AREA, &pmas); + if (ret < 0) + pr_err("pin mem fail, errno: %s\n", strerror(errno)); + return ret; +} + -+int pin_vmae(VmaEntry *vmae, struct pstree_item *item) ++static int pin_vmae(VmaEntry *vmae, struct pstree_item *item) +{ + int fd; + int ret = 0; @@ -357,7 +189,6 @@ index 0000000..b18db97 + pr_err("open file: %s fail.\n", PIN_MEM_FILE); + return -1; + } -+ + start = vmae->start; + while (start < vmae->end) { + end = vmae->end; @@ -370,84 +201,95 @@ index 0000000..b18db97 + return ret; +} + -+int dump_task_special_pages(int pid) -+{ -+ int fd, ret; -+ -+ fd = open(PIN_MEM_FILE, O_RDWR, 0); -+ if (fd < 0) { -+ pr_warn("error open file: %s\n", PIN_MEM_FILE); -+ return -1; + static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasite_dump_pages_args *args, + struct vm_area_list *vma_area_list, struct mem_dump_ctl *mdc, + struct parasite_ctl *ctl) +@@ -500,6 +579,19 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit + goto out_xfer; + } + ++ if (opts.pin_memory) { ++ /* pin memory before dump pages */ ++ list_for_each_entry(vma_area, &vma_area_list->h, list) { ++ if (should_pin_vmae(vma_area->e)) { ++ ret = pin_vmae(vma_area->e, item); ++ if (ret) { ++ exit_code = -1; ++ goto out_xfer; ++ } ++ } ++ } + } + -+ ret = ioctl(fd, DUMP_SPECIAL_PAGES, (unsigned long) &pid); -+ if (ret < 0) -+ pr_warn("No need DUMP_SPECIAL_PAGES for %d\n", pid); -+ -+ close(fd); -+ return ret; -+} + /* + * Step 1 -- generate the pagemap + */ +@@ -509,6 +601,10 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit + parent_predump_mode = mdc->parent_ie->pre_dump_mode; + + list_for_each_entry(vma_area, &vma_area_list->h, list) { ++ if (opts.pin_memory && should_pin_vmae(vma_area->e)) { ++ continue; ++ } + -+int restore_task_special_pages(int pid) + ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, &pmc, has_parent, mdc->pre_dump, + parent_predump_mode); + if (ret < 0) +diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c +index fbc89fe..d04f8f1 100644 +--- a/criu/pie/restorer.c ++++ b/criu/pie/restorer.c +@@ -1384,6 +1384,24 @@ int cleanup_current_inotify_events(struct task_restore_args *task_args) + return 0; + } + ++int remap_vmas(int pid) +{ -+ int fd, ret; ++ int fd, ret = 0; + -+ fd = open(PIN_MEM_FILE, O_RDWR, 0); -+ if (fd < 0) { -+ pr_warn("error open file: %s\n", PIN_MEM_FILE); -+ return -1; ++ fd = sys_open(PIN_MEM_FILE, O_RDWR, 0); ++ if (fd == -1) { ++ pr_err("open file: %s fail.\n", PIN_MEM_FILE); ++ return -1;; + } + -+ ret = ioctl(fd, RETORE_SPECIAL_PAGES, (unsigned long) &pid); ++ ret = sys_ioctl(fd, REMAP_PIN_MEM_AREA, (unsigned long) &pid); + if (ret < 0) -+ pr_warn("No need RETORE_SPECIAL_PAGES for %d\n", pid); -+ -+ close(fd); ++ pr_err("remap pin mem fail for pid: %d\n", pid); ++ sys_close(fd); + return ret; +} + -+int clear_pin_mem(int pid) -+{ -+ int fd, ret; + -+ fd = open(PIN_MEM_FILE, O_RDWR, 0); -+ if (fd < 0) { -+ pr_warn("error open file: %s\n", PIN_MEM_FILE); -+ return -1; -+ } -+ -+ ret = ioctl(fd, CLEAR_PIN_MEM_AREA, (unsigned long) &pid); -+ if (ret < 0) { -+ pr_warn("clear pin mem fail: %d\n", pid); + /* + * The main routine to restore task via sigreturn. + * This one is very special, we never return there +@@ -1553,7 +1571,12 @@ long __export_restore_task(struct task_restore_args *args) + goto core_restore_end; + } + } +- ++ if (args->pin_memory) { ++ if (remap_vmas(my_pid) < 0) { ++ pr_err("Remap vmas fail\n"); ++ goto core_restore_end; ++ } + } -+ -+ close(fd); -+ return ret; -+} + /* + * Now read the contents (if any) + */ diff --git a/criu/seize.c b/criu/seize.c -index 95bf9ef..8a35c3c 100644 +index 95bf9ef..c11ecab 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -23,6 +23,7 @@ #include "string.h" #include "xmalloc.h" #include "util.h" -+#include "pin-mem.h" ++#include "mem.h" #define NR_ATTEMPTS 5 -@@ -640,6 +641,11 @@ static void unseize_task_and_threads(const struct pstree_item *item, int st) - if (item->pid->state == TASK_DEAD) - return; - -+ if (opts.pin_memory) { -+ for (i = 0; i < item->nr_threads; i++) -+ dump_task_special_pages(item->threads[i].real); -+ } -+ - /* - * The st is the state we want to switch tasks into, - * the item->state is the state task was in when we seized one. -- -2.34.1 +2.25.1 diff --git a/0003-kerndat-check-for-rseq-syscall-support-Signed-off-by.patch b/0003-kerndat-check-for-rseq-syscall-support-Signed-off-by.patch deleted file mode 100644 index 4a6ebc14b4fb3810936057f24305558c7e21f088..0000000000000000000000000000000000000000 --- a/0003-kerndat-check-for-rseq-syscall-support-Signed-off-by.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 35053ab4bb8fe09818da9421a053e2e13c7ad817 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 13:34:10 +0800 -Subject: [PATCH 03/72] kerndat: check for rseq syscall support Signed-off-by: - Alexander Mikhalitsyn - ---- - criu/include/kerndat.h | 1 + - criu/kerndat.c | 18 ++++++++++++++++++ - 2 files changed, 19 insertions(+) - -diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h -index 80bad7f..44a6976 100644 ---- a/criu/include/kerndat.h -+++ b/criu/include/kerndat.h -@@ -74,6 +74,7 @@ struct kerndat_s { - bool has_pidfd_getfd; - bool has_nspid; - bool has_nftables_concat; -+ bool has_rseq; - }; - - extern struct kerndat_s kdat; -diff --git a/criu/kerndat.c b/criu/kerndat.c -index 0e88ba4..f5a4490 100644 ---- a/criu/kerndat.c -+++ b/criu/kerndat.c -@@ -816,6 +816,20 @@ static int kerndat_x86_has_ptrace_fpu_xsave_bug(void) - return 0; - } - -+static int kerndat_has_rseq(void) -+{ -+ if (syscall(__NR_rseq, NULL, 0, 0, 0) != -1) { -+ pr_err("rseq should fail\n"); -+ return -1; -+ } -+ if (errno == ENOSYS) -+ pr_info("rseq syscall isn't supported\n"); -+ else -+ kdat.has_rseq = true; -+ -+ return 0; -+} -+ - #define KERNDAT_CACHE_FILE KDAT_RUNDIR "/criu.kdat" - #define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR "/.criu.kdat" - -@@ -1360,6 +1374,10 @@ int kerndat_init(void) - ret = -1; - } - -+ if (!ret && kerndat_has_rseq()) { -+ pr_err("kerndat_has_rseq failed when initializing kerndat.\n"); -+ ret = -1; -+ } - kerndat_lsm(); - kerndat_mmap_min_addr(); - kerndat_files_stat(); --- -2.34.1 - diff --git a/0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch b/0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch deleted file mode 100644 index ff73bad2149f2b605a7e7d311b445d00601be4d8..0000000000000000000000000000000000000000 --- a/0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch +++ /dev/null @@ -1,161 +0,0 @@ -From 30381c725f7c6738bd0df0f822aace1e66065b65 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 13:35:53 +0800 -Subject: [PATCH 04/72] util: move fork_and_ptrace_attach helper from cr-check - Signed-off-by: Alexander Mikhalitsyn - ---- - criu/cr-check.c | 55 ------------------------------------------- - criu/include/util.h | 1 + - criu/util.c | 57 +++++++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 58 insertions(+), 55 deletions(-) - -diff --git a/criu/cr-check.c b/criu/cr-check.c -index 3575fb3..d41ef8f 100644 ---- a/criu/cr-check.c -+++ b/criu/cr-check.c -@@ -537,61 +537,6 @@ static int check_sigqueuinfo(void) - return 0; - } - --static pid_t fork_and_ptrace_attach(int (*child_setup)(void)) --{ -- pid_t pid; -- int sk_pair[2], sk; -- char c = 0; -- -- if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) { -- pr_perror("socketpair"); -- return -1; -- } -- -- pid = fork(); -- if (pid < 0) { -- pr_perror("fork"); -- return -1; -- } else if (pid == 0) { -- sk = sk_pair[1]; -- close(sk_pair[0]); -- -- if (child_setup && child_setup() != 0) -- exit(1); -- -- if (write(sk, &c, 1) != 1) { -- pr_perror("write"); -- exit(1); -- } -- -- while (1) -- sleep(1000); -- exit(1); -- } -- -- sk = sk_pair[0]; -- close(sk_pair[1]); -- -- if (read(sk, &c, 1) != 1) { -- close(sk); -- kill(pid, SIGKILL); -- pr_perror("read"); -- return -1; -- } -- -- close(sk); -- -- if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) { -- pr_perror("Unable to ptrace the child"); -- kill(pid, SIGKILL); -- return -1; -- } -- -- waitpid(pid, NULL, 0); -- -- return pid; --} -- - static int check_ptrace_peeksiginfo(void) - { - struct ptrace_peeksiginfo_args arg; -diff --git a/criu/include/util.h b/criu/include/util.h -index a2dac22..1c0b3c7 100644 ---- a/criu/include/util.h -+++ b/criu/include/util.h -@@ -166,6 +166,7 @@ extern int is_anon_link_type(char *link, char *type); - - extern int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned flags); - extern int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid); -+extern pid_t fork_and_ptrace_attach(int (*child_setup)(void)); - extern int cr_daemon(int nochdir, int noclose, int close_fd); - extern int status_ready(void); - extern int is_root_user(void); -diff --git a/criu/util.c b/criu/util.c -index 06124c2..e682161 100644 ---- a/criu/util.c -+++ b/criu/util.c -@@ -654,6 +654,63 @@ out: - return ret; - } - -+pid_t fork_and_ptrace_attach(int (*child_setup)(void)) -+{ -+ pid_t pid; -+ int sk_pair[2], sk; -+ char c = 0; -+ -+ if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) { -+ pr_perror("socketpair"); -+ return -1; -+ } -+ -+ pid = fork(); -+ if (pid < 0) { -+ pr_perror("fork"); -+ return -1; -+ } else if (pid == 0) { -+ sk = sk_pair[1]; -+ close(sk_pair[0]); -+ -+ if (child_setup && child_setup() != 0) -+ exit(1); -+ -+ if (write(sk, &c, 1) != 1) { -+ pr_perror("write"); -+ exit(1); -+ } -+ -+ while (1) -+ sleep(1000); -+ exit(1); -+ } -+ -+ sk = sk_pair[0]; -+ close(sk_pair[1]); -+ -+ if (read(sk, &c, 1) != 1) { -+ close(sk); -+ kill(pid, SIGKILL); -+ waitpid(pid, NULL, 0); -+ pr_perror("read"); -+ return -1; -+ } -+ -+ close(sk); -+ -+ if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) { -+ pr_perror("Unable to ptrace the child"); -+ kill(pid, SIGKILL); -+ waitpid(pid, NULL, 0); -+ return -1; -+ } -+ -+ waitpid(pid, NULL, 0); -+ -+ return pid; -+} -+ - int status_ready(void) - { - char c = 0; --- -2.34.1 - diff --git a/0005-cr-check-Add-ptrace-rseq-conf-dump-feature-Add-get_r.patch b/0005-cr-check-Add-ptrace-rseq-conf-dump-feature-Add-get_r.patch deleted file mode 100644 index 0375b6d64a590918783b53e64bfcc49fbbb00711..0000000000000000000000000000000000000000 --- a/0005-cr-check-Add-ptrace-rseq-conf-dump-feature-Add-get_r.patch +++ /dev/null @@ -1,162 +0,0 @@ -From f84bab6b29146ef7fb9867af0324efb90596e12c Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:30:18 +0800 -Subject: [PATCH 05/72] cr-check: Add ptrace rseq conf dump feature Add - "get_rseq_conf" feature corresponding to the - ptrace(PTRACE_GET_RSEQ_CONFIGURATION) support. - -Signed-off-by: Alexander Mikhalitsyn ---- - compel/include/uapi/ptrace.h | 12 +++++++++++ - criu/cr-check.c | 11 ++++++++++ - criu/include/kerndat.h | 1 + - criu/kerndat.c | 41 ++++++++++++++++++++++++++++++++++++ - 4 files changed, 65 insertions(+) - -diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h -index c5291d2..bfe28c7 100644 ---- a/compel/include/uapi/ptrace.h -+++ b/compel/include/uapi/ptrace.h -@@ -65,6 +65,18 @@ typedef struct { - uint64_t flags; /* Output: filter's flags */ - } seccomp_metadata_t; - -+#ifndef PTRACE_GET_RSEQ_CONFIGURATION -+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f -+ -+struct ptrace_rseq_configuration { -+ __u64 rseq_abi_pointer; -+ __u32 rseq_abi_size; -+ __u32 signature; -+ __u32 flags; -+ __u32 pad; -+}; -+#endif -+ - #ifdef PTRACE_EVENT_STOP - #if PTRACE_EVENT_STOP == 7 /* Bad value from Linux 3.1-3.3, fixed in 3.4 */ - #undef PTRACE_EVENT_STOP -diff --git a/criu/cr-check.c b/criu/cr-check.c -index d41ef8f..ba87511 100644 ---- a/criu/cr-check.c -+++ b/criu/cr-check.c -@@ -794,6 +794,15 @@ static int check_ptrace_dump_seccomp_filters(void) - return ret; - } - -+static int check_ptrace_get_rseq_conf(void) -+{ -+ if (!kdat.has_ptrace_get_rseq_conf) { -+ pr_warn("ptrace(PTRACE_GET_RSEQ_CONFIGURATION) isn't supported. C/R of processes which are using rseq() won't work.\n"); -+ return -1; -+ } -+ return 0; -+} -+ - static int check_mem_dirty_track(void) - { - if (!kdat.has_dirty_track) { -@@ -1435,6 +1444,7 @@ int cr_check(void) - ret |= check_ns_pid(); - ret |= check_apparmor_stacking(); - ret |= check_network_lock_nftables(); -+ ret |= check_ptrace_get_rseq_conf(); - } - - /* -@@ -1547,6 +1557,7 @@ static struct feature_list feature_list[] = { - { "ns_pid", check_ns_pid }, - { "apparmor_stacking", check_apparmor_stacking }, - { "network_lock_nftables", check_network_lock_nftables }, -+ { "get_rseq_conf", check_ptrace_get_rseq_conf }, - { NULL, NULL }, - }; - -diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h -index 44a6976..05abeda 100644 ---- a/criu/include/kerndat.h -+++ b/criu/include/kerndat.h -@@ -75,6 +75,7 @@ struct kerndat_s { - bool has_nspid; - bool has_nftables_concat; - bool has_rseq; -+ bool has_ptrace_get_rseq_conf; - }; - - extern struct kerndat_s kdat; -diff --git a/criu/kerndat.c b/criu/kerndat.c -index f5a4490..4841387 100644 ---- a/criu/kerndat.c -+++ b/criu/kerndat.c -@@ -4,6 +4,8 @@ - #include - #include - #include -+#include -+#include - #include - #include - #include -@@ -36,6 +38,7 @@ - #include "sockets.h" - #include "net.h" - #include "tun.h" -+#include - #include - #include "netfilter.h" - #include "fsnotify.h" -@@ -830,6 +833,40 @@ static int kerndat_has_rseq(void) - return 0; - } - -+static int kerndat_has_ptrace_get_rseq_conf(void) -+{ -+ pid_t pid; -+ int len; -+ struct ptrace_rseq_configuration rseq; -+ -+ pid = fork_and_ptrace_attach(NULL); -+ if (pid < 0) -+ return -1; -+ -+ len = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, pid, sizeof(rseq), &rseq); -+ if (len != sizeof(rseq)) { -+ kdat.has_ptrace_get_rseq_conf = false; -+ pr_info("ptrace(PTRACE_GET_RSEQ_CONFIGURATION) is not supported\n"); -+ goto out; -+ } -+ -+ /* -+ * flags is always zero from the kernel side, if it will be changed -+ * we need to pay attention to that and, possibly, make changes on the CRIU side. -+ */ -+ if (rseq.flags != 0) { -+ kdat.has_ptrace_get_rseq_conf = false; -+ pr_err("ptrace(PTRACE_GET_RSEQ_CONFIGURATION): rseq.flags != 0\n"); -+ } else { -+ kdat.has_ptrace_get_rseq_conf = true; -+ } -+ -+out: -+ kill(pid, SIGKILL); -+ waitpid(pid, NULL, 0); -+ return 0; -+} -+ - #define KERNDAT_CACHE_FILE KDAT_RUNDIR "/criu.kdat" - #define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR "/.criu.kdat" - -@@ -1378,6 +1415,10 @@ int kerndat_init(void) - pr_err("kerndat_has_rseq failed when initializing kerndat.\n"); - ret = -1; - } -+ if (!ret && kerndat_has_ptrace_get_rseq_conf()) { -+ pr_err("kerndat_has_ptrace_get_rseq_conf failed when initializing kerndat.\n"); -+ ret = -1; -+ } - kerndat_lsm(); - kerndat_mmap_min_addr(); - kerndat_files_stat(); --- -2.34.1 - diff --git a/0006-rseq-initial-support-TODO-1.-properly-handle-case-wh.patch b/0006-rseq-initial-support-TODO-1.-properly-handle-case-wh.patch deleted file mode 100644 index 4c19ba500ec6832dc604b524acef4234feadbcd9..0000000000000000000000000000000000000000 --- a/0006-rseq-initial-support-TODO-1.-properly-handle-case-wh.patch +++ /dev/null @@ -1,702 +0,0 @@ -From c905adf3aaa116984e28a51700c53917f3651e3b Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 14:52:35 +0800 -Subject: [PATCH 06/72] rseq: initial support TODO: 1. properly handle case - when the kernel has rseq() support but has no - ptrace(PTRACE_GET_RSEQ_CONFIGURATION) support and user processes haven't used - rseq(). 2. properly handle "transient" states, when CRIU comes during rseq - was executed. We need test for this case with some "heavy" rseq + we need to - properly handle RSEQ_CS_* flags. - -Fixes: #1696 - -Reported-by: Radostin Stoyanov -Suggested-by: Florian Weimer -Signed-off-by: Alexander Mikhalitsyn ---- - compel/include/uapi/ptrace.h | 16 ++-- - criu/cr-dump.c | 99 ++++++++++++++++++++++++ - criu/cr-restore.c | 17 +++++ - criu/include/linux/rseq.h | 144 +++++++++++++++++++++++++++++++++++ - criu/include/parasite.h | 7 ++ - criu/include/restorer.h | 7 ++ - criu/kerndat.c | 2 +- - criu/parasite-syscall.c | 11 +++ - criu/pie/parasite.c | 99 ++++++++++++++++++++++++ - criu/pie/restorer.c | 24 ++++++ - images/Makefile | 1 + - images/core.proto | 2 + - images/rseq.proto | 9 +++ - 13 files changed, 429 insertions(+), 9 deletions(-) - create mode 100644 criu/include/linux/rseq.h - create mode 100644 images/rseq.proto - -diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h -index bfe28c7..d807a92 100644 ---- a/compel/include/uapi/ptrace.h -+++ b/compel/include/uapi/ptrace.h -@@ -66,14 +66,14 @@ typedef struct { - } seccomp_metadata_t; - - #ifndef PTRACE_GET_RSEQ_CONFIGURATION --#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f -- --struct ptrace_rseq_configuration { -- __u64 rseq_abi_pointer; -- __u32 rseq_abi_size; -- __u32 signature; -- __u32 flags; -- __u32 pad; -+#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f -+ -+struct __ptrace_rseq_configuration { -+ uint64_t rseq_abi_pointer; -+ uint32_t rseq_abi_size; -+ uint32_t signature; -+ uint32_t flags; -+ uint32_t pad; - }; - #endif - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index f07fe6e..91dd08a 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -45,6 +45,7 @@ - #include "proc_parse.h" - #include "parasite.h" - #include "parasite-syscall.h" -+#include - #include "files.h" - #include "files-reg.h" - #include "shmem.h" -@@ -200,6 +201,25 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc) - return 0; - } - -+static int check_thread_rseq(pid_t tid, const struct parasite_check_rseq *ti_rseq, bool has_tc_rseq_entry) -+{ -+ if (!kdat.has_rseq || kdat.has_ptrace_get_rseq_conf) -+ return 0; -+ -+ pr_debug("%d has rseq_inited = %d\n", tid, ti_rseq->rseq_inited); -+ -+ /* -+ * We have no kdat.has_ptrace_get_rseq_conf and user -+ * process has rseq() used, let's fail dump. -+ */ -+ if (ti_rseq->rseq_inited) { -+ pr_err("%d has rseq but kernel lacks get_rseq_conf feature\n", tid); -+ return -1; -+ } -+ -+ return 0; -+} -+ - struct cr_imgset *glob_imgset; - - static int collect_fds(pid_t pid, struct parasite_drain_fd **dfds) -@@ -730,6 +750,17 @@ int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread - if (!ret) - ret = seccomp_dump_thread(pid, tc); - -+ /* -+ * We are dumping rseq() in the dump_thread_rseq() function, -+ * *before* processes gets infected (because of ptrace requests -+ * API restriction). At this point, if the kernel lacks -+ * kdat.has_ptrace_get_rseq_conf support we have to ensure -+ * that dumpable processes haven't initialized rseq() or -+ * fail dump if rseq() was used. -+ */ -+ if (!ret) -+ ret = check_thread_rseq(pid, &ti->rseq, !!tc->rseq_entry); -+ - return ret; - } - -@@ -1016,6 +1047,68 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item) - return 0; - } - -+static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep) -+{ -+ struct __ptrace_rseq_configuration rseq; -+ RseqEntry *rseqe = NULL; -+ int ret; -+ -+ /* -+ * If we are here it means that rseq() syscall is supported, -+ * but ptrace(PTRACE_GET_RSEQ_CONFIGURATION) isn't supported, -+ * we can just fail dump here. But this is bad idea, IMHO. -+ * -+ * So, we will try to detect if victim process was used rseq(). -+ * See check_rseq() and check_thread_rseq() functions. -+ */ -+ if (!kdat.has_ptrace_get_rseq_conf) -+ return 0; -+ -+ ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseq), &rseq); -+ if (ret != sizeof(rseq)) { -+ pr_perror("ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) = %d", tid, ret); -+ return -1; -+ } -+ -+ if (rseq.flags != 0) { -+ pr_err("something wrong with ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) flags = 0x%x\n", tid, -+ rseq.flags); -+ return -1; -+ } -+ -+ pr_err("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, rseq.signature); -+ -+ rseqe = xmalloc(sizeof(*rseqe)); -+ if (!rseqe) -+ return -1; -+ -+ rseq_entry__init(rseqe); -+ -+ rseqe->rseq_abi_pointer = rseq.rseq_abi_pointer; -+ rseqe->rseq_abi_size = rseq.rseq_abi_size; -+ rseqe->signature = rseq.signature; -+ -+ *rseqep = rseqe; -+ -+ return 0; -+} -+ -+static int dump_task_rseq(pid_t pid, struct pstree_item *item) -+{ -+ int i; -+ -+ /* if rseq() syscall isn't supported then nothing to dump */ -+ if (!kdat.has_rseq) -+ return 0; -+ -+ for (i = 0; i < item->nr_threads; i++) { -+ if (dump_thread_rseq(item->threads[i].real, &item->core[i]->thread_core->rseq_entry)) -+ return -1; -+ } -+ -+ return 0; -+} -+ - static struct proc_pid_stat pps_buf; - - static int dump_task_threads(struct parasite_ctl *parasite_ctl, const struct pstree_item *item) -@@ -1304,6 +1397,12 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - goto err; - } - -+ ret = dump_task_rseq(pid, item); -+ if (ret) { -+ pr_err("Dump %d rseq failed %d\n", pid, ret); -+ goto err; -+ } -+ - parasite_ctl = parasite_infect_seized(pid, item, &vmas); - if (!parasite_ctl) { - pr_err("Can't infect (pid: %d) with parasite\n", pid); -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 5b645c1..b2bd044 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -2975,6 +2975,19 @@ static int prep_sched_info(struct rst_sched_param *sp, ThreadCoreEntry *tc) - return 0; - } - -+static int prep_rseq(struct rst_rseq_param *rseq, ThreadCoreEntry *tc) -+{ -+ /* compatibility with older CRIU versions */ -+ if (!tc->rseq_entry) -+ return 0; -+ -+ rseq->rseq_abi_pointer = tc->rseq_entry->rseq_abi_pointer; -+ rseq->rseq_abi_size = tc->rseq_entry->rseq_abi_size; -+ rseq->signature = tc->rseq_entry->signature; -+ -+ return 0; -+} -+ - static rlim_t decode_rlim(rlim_t ival) - { - return ival == -1 ? RLIM_INFINITY : ival; -@@ -3704,6 +3717,10 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns - thread_args[i].clear_tid_addr = CORE_THREAD_ARCH_INFO(tcore)->clear_tid_addr; - core_get_tls(tcore, &thread_args[i].tls); - -+ ret = prep_rseq(&thread_args[i].rseq, tcore->thread_core); -+ if (ret) -+ goto err; -+ - rst_reloc_creds(&thread_args[i], &creds_pos_next); - - thread_args[i].futex_rla = tcore->thread_core->futex_rla; -diff --git a/criu/include/linux/rseq.h b/criu/include/linux/rseq.h -new file mode 100644 -index 0000000..5c1706a ---- /dev/null -+++ b/criu/include/linux/rseq.h -@@ -0,0 +1,144 @@ -+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -+#ifndef _UAPI_LINUX_RSEQ_H -+#define _UAPI_LINUX_RSEQ_H -+ -+/* -+ * linux/rseq.h -+ * -+ * Restartable sequences system call API -+ * -+ * Copyright (c) 2015-2018 Mathieu Desnoyers -+ */ -+ -+#include -+#include -+ -+enum rseq_cpu_id_state { -+ RSEQ_CPU_ID_UNINITIALIZED = -1, -+ RSEQ_CPU_ID_REGISTRATION_FAILED = -2, -+}; -+ -+enum rseq_flags { -+ RSEQ_FLAG_UNREGISTER = (1 << 0), -+}; -+ -+enum rseq_cs_flags_bit { -+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, -+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, -+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, -+}; -+ -+enum rseq_cs_flags { -+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), -+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), -+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), -+}; -+ -+/* -+ * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always -+ * contained within a single cache-line. It is usually declared as -+ * link-time constant data. -+ */ -+struct rseq_cs { -+ /* Version of this structure. */ -+ __u32 version; -+ /* enum rseq_cs_flags */ -+ __u32 flags; -+ __u64 start_ip; -+ /* Offset from start_ip. */ -+ __u64 post_commit_offset; -+ __u64 abort_ip; -+} __attribute__((aligned(4 * sizeof(__u64)))); -+ -+/* -+ * struct rseq is aligned on 4 * 8 bytes to ensure it is always -+ * contained within a single cache-line. -+ * -+ * A single struct rseq per thread is allowed. -+ */ -+struct rseq { -+ /* -+ * Restartable sequences cpu_id_start field. Updated by the -+ * kernel. Read by user-space with single-copy atomicity -+ * semantics. This field should only be read by the thread which -+ * registered this data structure. Aligned on 32-bit. Always -+ * contains a value in the range of possible CPUs, although the -+ * value may not be the actual current CPU (e.g. if rseq is not -+ * initialized). This CPU number value should always be compared -+ * against the value of the cpu_id field before performing a rseq -+ * commit or returning a value read from a data structure indexed -+ * using the cpu_id_start value. -+ */ -+ __u32 cpu_id_start; -+ /* -+ * Restartable sequences cpu_id field. Updated by the kernel. -+ * Read by user-space with single-copy atomicity semantics. This -+ * field should only be read by the thread which registered this -+ * data structure. Aligned on 32-bit. Values -+ * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED -+ * have a special semantic: the former means "rseq uninitialized", -+ * and latter means "rseq initialization failed". This value is -+ * meant to be read within rseq critical sections and compared -+ * with the cpu_id_start value previously read, before performing -+ * the commit instruction, or read and compared with the -+ * cpu_id_start value before returning a value loaded from a data -+ * structure indexed using the cpu_id_start value. -+ */ -+ __u32 cpu_id; -+ /* -+ * Restartable sequences rseq_cs field. -+ * -+ * Contains NULL when no critical section is active for the current -+ * thread, or holds a pointer to the currently active struct rseq_cs. -+ * -+ * Updated by user-space, which sets the address of the currently -+ * active rseq_cs at the beginning of assembly instruction sequence -+ * block, and set to NULL by the kernel when it restarts an assembly -+ * instruction sequence block, as well as when the kernel detects that -+ * it is preempting or delivering a signal outside of the range -+ * targeted by the rseq_cs. Also needs to be set to NULL by user-space -+ * before reclaiming memory that contains the targeted struct rseq_cs. -+ * -+ * Read and set by the kernel. Set by user-space with single-copy -+ * atomicity semantics. This field should only be updated by the -+ * thread which registered this data structure. Aligned on 64-bit. -+ */ -+ union { -+ __u64 ptr64; -+#ifdef __LP64__ -+ __u64 ptr; -+#else -+ struct { -+#if (defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN)) || defined(__BIG_ENDIAN) -+ __u32 padding; /* Initialized to zero. */ -+ __u32 ptr32; -+#else /* LITTLE */ -+ __u32 ptr32; -+ __u32 padding; /* Initialized to zero. */ -+#endif /* ENDIAN */ -+ } ptr; -+#endif -+ } rseq_cs; -+ -+ /* -+ * Restartable sequences flags field. -+ * -+ * This field should only be updated by the thread which -+ * registered this data structure. Read by the kernel. -+ * Mainly used for single-stepping through rseq critical sections -+ * with debuggers. -+ * -+ * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT -+ * Inhibit instruction sequence block restart on preemption -+ * for this thread. -+ * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL -+ * Inhibit instruction sequence block restart on signal -+ * delivery for this thread. -+ * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE -+ * Inhibit instruction sequence block restart on migration for -+ * this thread. -+ */ -+ __u32 flags; -+} __attribute__((aligned(4 * sizeof(__u64)))); -+ -+#endif /* _UAPI_LINUX_RSEQ_H */ -diff --git a/criu/include/parasite.h b/criu/include/parasite.h -index 8107aa4..5fde809 100644 ---- a/criu/include/parasite.h -+++ b/criu/include/parasite.h -@@ -164,10 +164,17 @@ struct parasite_dump_creds { - unsigned int groups[0]; - }; - -+struct parasite_check_rseq { -+ bool has_rseq; -+ bool has_ptrace_get_rseq_conf; /* no need to check if supported */ -+ bool rseq_inited; -+}; -+ - struct parasite_dump_thread { - unsigned int *tid_addr; - pid_t tid; - tls_t tls; -+ struct parasite_check_rseq rseq; - stack_t sas; - int pdeath_sig; - char comm[TASK_COMM_LEN]; -diff --git a/criu/include/restorer.h b/criu/include/restorer.h -index c2ef8f0..c29d869 100644 ---- a/criu/include/restorer.h -+++ b/criu/include/restorer.h -@@ -45,6 +45,12 @@ struct rst_sched_param { - int prio; - }; - -+struct rst_rseq_param { -+ u64 rseq_abi_pointer; -+ u32 rseq_abi_size; -+ u32 signature; -+}; -+ - struct restore_posix_timer { - struct str_posix_timer spt; - struct itimerspec val; -@@ -99,6 +105,7 @@ struct thread_restore_args { - struct task_restore_args *ta; - - tls_t tls; -+ struct rst_rseq_param rseq; - - siginfo_t *siginfo; - unsigned int siginfo_n; -diff --git a/criu/kerndat.c b/criu/kerndat.c -index 4841387..af7113a 100644 ---- a/criu/kerndat.c -+++ b/criu/kerndat.c -@@ -837,7 +837,7 @@ static int kerndat_has_ptrace_get_rseq_conf(void) - { - pid_t pid; - int len; -- struct ptrace_rseq_configuration rseq; -+ struct __ptrace_rseq_configuration rseq; - - pid = fork_and_ptrace_attach(NULL); - if (pid < 0) -diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c -index 7175ade..ee4fa86 100644 ---- a/criu/parasite-syscall.c -+++ b/criu/parasite-syscall.c -@@ -132,6 +132,13 @@ static int alloc_groups_copy_creds(CredsEntry *ce, struct parasite_dump_creds *c - return ce->groups ? 0 : -ENOMEM; - } - -+static void init_parasite_rseq_arg(struct parasite_check_rseq *rseq) -+{ -+ rseq->has_rseq = kdat.has_rseq; -+ rseq->has_ptrace_get_rseq_conf = kdat.has_ptrace_get_rseq_conf; -+ rseq->rseq_inited = false; -+} -+ - int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEntry *core) - { - ThreadCoreEntry *tc = core->thread_core; -@@ -144,6 +151,8 @@ int parasite_dump_thread_leader_seized(struct parasite_ctl *ctl, int pid, CoreEn - pc = args->creds; - pc->cap_last_cap = kdat.last_cap; - -+ init_parasite_rseq_arg(&args->rseq); -+ - ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_THREAD, ctl); - if (ret < 0) - return ret; -@@ -197,6 +206,8 @@ int parasite_dump_thread_seized(struct parasite_thread_ctl *tctl, struct parasit - - compel_arch_get_tls_thread(tctl, &args->tls); - -+ init_parasite_rseq_arg(&args->rseq); -+ - ret = compel_run_in_thread(tctl, PARASITE_CMD_DUMP_THREAD); - if (ret) { - pr_err("Can't init thread in parasite %d\n", pid); -diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c -index bc0a33c..e49958b 100644 ---- a/criu/pie/parasite.c -+++ b/criu/pie/parasite.c -@@ -8,6 +8,8 @@ - #include - #include - -+#include "linux/rseq.h" -+ - #include "common/config.h" - #include "int.h" - #include "types.h" -@@ -167,6 +169,7 @@ static int dump_posix_timers(struct parasite_dump_posix_timers_args *args) - } - - static int dump_creds(struct parasite_dump_creds *args); -+static int check_rseq(struct parasite_check_rseq *rseq); - - static int dump_thread_common(struct parasite_dump_thread *ti) - { -@@ -197,6 +200,12 @@ static int dump_thread_common(struct parasite_dump_thread *ti) - goto out; - } - -+ ret = check_rseq(&ti->rseq); -+ if (ret) { -+ pr_err("Unable to check if rseq() is initialized: %d\n", ret); -+ goto out; -+ } -+ - ret = dump_creds(ti->creds); - out: - return ret; -@@ -313,6 +322,96 @@ grps_err: - return -1; - } - -+static int check_rseq(struct parasite_check_rseq *rseq) -+{ -+ int ret; -+ unsigned long rseq_abi_pointer; -+ unsigned long rseq_abi_size; -+ uint32_t rseq_signature; -+ void *addr; -+ -+ /* no need to do hacky check if we can get all info from ptrace() */ -+ if (!rseq->has_rseq || rseq->has_ptrace_get_rseq_conf) -+ return 0; -+ -+ /* -+ * We need to determine if victim process has rseq() -+ * initialized, but we have no *any* proper kernel interface -+ * supported at this point. -+ * Our plan: -+ * 1. We know that if we call rseq() syscall and process already -+ * has current->rseq filled, then we get: -+ * -EINVAL if current->rseq != rseq || rseq_len != sizeof(*rseq), -+ * -EPERM if current->rseq_sig != sig), -+ * -EBUSY if current->rseq == rseq && rseq_len == sizeof(*rseq) && -+ * current->rseq_sig != sig -+ * if current->rseq == NULL (rseq() wasn't used) then we go to: -+ * IS_ALIGNED(rseq ...) check, if we fail it we get -EINVAL and it -+ * will be hard to distinguish case when rseq() was initialized or not. -+ * Let's construct arguments payload -+ * with: -+ * 1. correct rseq_abi_size -+ * 2. aligned and correct rseq_abi_pointer -+ * And see what rseq() return to us. -+ * If ret value is: -+ * 0: it means that rseq *wasn't* used and we successfuly registered it, -+ * -EINVAL or : it means that rseq is already initialized, -+ * so we *have* to dump it. But as we have has_ptrace_get_rseq_conf = false, -+ * we should just fail dump as it's unsafe to skip rseq() dump for processes -+ * with rseq() initialized. -+ * -EPERM or -EBUSY: should not happen as we take a fresh memory area for rseq -+ */ -+ addr = (void *)sys_mmap(NULL, sizeof(struct rseq), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if (addr == MAP_FAILED) { -+ pr_err("mmap() failed for struct rseq ret = %lx\n", (unsigned long)addr); -+ return -1; -+ } -+ -+ memset(addr, 0, sizeof(struct rseq)); -+ -+ /* sys_mmap returns page aligned addresses */ -+ rseq_abi_pointer = (unsigned long)addr; -+ rseq_abi_size = (unsigned long)sizeof(struct rseq); -+ /* it's not so important to have unique signature for us, -+ * because rseq_abi_pointer is guaranteed to be unique -+ */ -+ rseq_signature = 0x12345612; -+ -+ pr_info("\ttrying sys_rseq(%lx, %lx, %x, %x)\n", rseq_abi_pointer, rseq_abi_size, 0, rseq_signature); -+ ret = sys_rseq((void *)rseq_abi_pointer, rseq_abi_size, 0, rseq_signature); -+ if (ret) { -+ if (ret == -EINVAL) { -+ pr_info("\trseq is initialized in the victim\n"); -+ rseq->rseq_inited = true; -+ -+ ret = 0; -+ } else { -+ pr_err("\tunexpected failure of sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer, -+ rseq_abi_size, 0, rseq_signature, ret); -+ -+ ret = -1; -+ } -+ } else { -+ ret = sys_rseq((void *)rseq_abi_pointer, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, rseq_signature); -+ if (ret) { -+ pr_err("\tfailed to unregister sys_rseq(%lx, %lx, %x, %x) = %d\n", rseq_abi_pointer, -+ rseq_abi_size, RSEQ_FLAG_UNREGISTER, rseq_signature, ret); -+ -+ ret = -1; -+ goto out; -+ } -+ -+ pr_info("\tsys_rseq succeed, let's unregister it back... ok Error\n"); -+ pr_info("\trseq is non-initialized in the victim Error\n"); -+ rseq->rseq_inited = false; -+ ret = 0; -+ } -+ -+out: -+ sys_munmap(addr, sizeof(struct rseq)); -+ return ret; -+} -+ - static int fill_fds_fown(int fd, struct fd_opts *p) - { - int flags, ret; -diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c -index fbc89fe..368b5a0 100644 ---- a/criu/pie/restorer.c -+++ b/criu/pie/restorer.c -@@ -459,6 +459,27 @@ static int restore_cpu_affinity(struct task_restore_args *args) - return 0; - } - -+static int restore_rseq(struct rst_rseq_param *rseq) -+{ -+ int ret; -+ -+ if (!rseq->rseq_abi_pointer) { -+ pr_debug("rseq: nothing to restore\n"); -+ return 0; -+ } -+ -+ pr_debug("rseq: rseq_abi_pointer = %lx signature = %x\n", (unsigned long)rseq->rseq_abi_pointer, rseq->signature); -+ -+ ret = sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 0, rseq->signature); -+ if (ret) { -+ pr_err("failed sys_rseq(%lx, %lx, %x, %x) = %d\n", (unsigned long)rseq->rseq_abi_pointer, -+ (unsigned long)rseq->rseq_abi_size, 0, rseq->signature, ret); -+ return -1; -+ } -+ -+ return 0; -+} -+ - static int restore_seccomp_filter(pid_t tid, struct thread_restore_args *args) - { - unsigned int flags = args->seccomp_force_tsync ? SECCOMP_FILTER_FLAG_TSYNC : 0; -@@ -583,6 +604,9 @@ static int restore_thread_common(struct thread_restore_args *args) - - restore_tls(&args->tls); - -+ if (restore_rseq(&args->rseq)) -+ return -1; -+ - return 0; - } - -diff --git a/images/Makefile b/images/Makefile -index 2eaeb7c..004e22e 100644 ---- a/images/Makefile -+++ b/images/Makefile -@@ -71,6 +71,7 @@ proto-obj-y += img-streamer.o - proto-obj-y += bpfmap-file.o - proto-obj-y += bpfmap-data.o - proto-obj-y += apparmor.o -+proto-obj-y += rseq.o - - CFLAGS += -iquote $(obj)/ - -diff --git a/images/core.proto b/images/core.proto -index 39e7f32..b66230e 100644 ---- a/images/core.proto -+++ b/images/core.proto -@@ -14,6 +14,7 @@ import "timer.proto"; - import "creds.proto"; - import "sa.proto"; - import "siginfo.proto"; -+import "rseq.proto"; - - import "opts.proto"; - -@@ -106,6 +107,7 @@ message thread_core_entry { - optional string comm = 13; - optional uint64 blk_sigset_extended = 14; - required thread_allowedcpus_entry allowed_cpus = 15; -+ optional rseq_entry rseq_entry = 16; - } - - message task_rlimits_entry { -diff --git a/images/rseq.proto b/images/rseq.proto -new file mode 100644 -index 0000000..be28004 ---- /dev/null -+++ b/images/rseq.proto -@@ -0,0 +1,9 @@ -+// SPDX-License-Identifier: MIT -+ -+syntax = "proto2"; -+ -+message rseq_entry { -+ required uint64 rseq_abi_pointer = 1; -+ required uint32 rseq_abi_size = 2; -+ required uint32 signature = 3; -+} --- -2.34.1 - diff --git a/0007-zdtm-add-simple-test-for-rseq-C-R-Signed-off-by-Alex.patch b/0007-zdtm-add-simple-test-for-rseq-C-R-Signed-off-by-Alex.patch deleted file mode 100644 index a0ab3f097418bafc58e71bfca40520167b763ad4..0000000000000000000000000000000000000000 --- a/0007-zdtm-add-simple-test-for-rseq-C-R-Signed-off-by-Alex.patch +++ /dev/null @@ -1,217 +0,0 @@ -From dc83ed27d305237298b8754d1159f2e7f5c926ae Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 14:54:28 +0800 -Subject: [PATCH 07/72] zdtm: add simple test for rseq C/R Signed-off-by: - Alexander Mikhalitsyn - ---- - test/zdtm/static/Makefile | 1 + - test/zdtm/static/rseq00.c | 174 +++++++++++++++++++++++++++++++++++ - test/zdtm/static/rseq00.desc | 1 + - 3 files changed, 176 insertions(+) - create mode 100644 test/zdtm/static/rseq00.c - create mode 100644 test/zdtm/static/rseq00.desc - -diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile -index 70123cf..563d947 100644 ---- a/test/zdtm/static/Makefile -+++ b/test/zdtm/static/Makefile -@@ -61,6 +61,7 @@ TST_NOFILE := \ - pthread02 \ - pthread_timers \ - pthread_timers_h \ -+ rseq00 \ - vdso00 \ - vdso01 \ - vdso02 \ -diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c -new file mode 100644 -index 0000000..26f41a2 ---- /dev/null -+++ b/test/zdtm/static/rseq00.c -@@ -0,0 +1,174 @@ -+/* -+ * test for rseq() syscall -+ * See also https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ -+ * https://github.com/torvalds/linux/commit/d7822b1e24f2df5df98c76f0e94a5416349ff759 -+ */ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "zdtmtst.h" -+ -+#if defined(__x86_64__) -+ -+const char *test_doc = "Check that rseq() basic C/R works"; -+const char *test_author = "Alexander Mikhalitsyn "; -+/* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */ -+ -+/* some useful definitions from kernel uapi */ -+enum rseq_flags { -+ RSEQ_FLAG_UNREGISTER = (1 << 0), -+}; -+ -+struct rseq { -+ uint32_t cpu_id_start; -+ uint32_t cpu_id; -+ uint64_t rseq_cs; -+ uint32_t flags; -+} __attribute__((aligned(4 * sizeof(uint64_t)))); -+ -+#ifndef __NR_rseq -+#define __NR_rseq 334 -+#endif -+/* EOF */ -+ -+static __thread volatile struct rseq __rseq_abi; -+ -+#define RSEQ_SIG 0x53053053 -+ -+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) -+{ -+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); -+} -+ -+static void register_thread(void) -+{ -+ int rc; -+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); -+ if (rc) { -+ fail("Failed to register rseq"); -+ exit(1); -+ } -+} -+ -+static void unregister_thread(void) -+{ -+ int rc; -+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG); -+ if (rc) { -+ fail("Failed to unregister rseq"); -+ exit(1); -+ } -+} -+ -+static void check_thread(void) -+{ -+ int rc; -+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); -+ if (!(rc && errno == EBUSY)) { -+ fail("Failed to check rseq %d", rc); -+ exit(1); -+ } -+} -+ -+#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x)) -+ -+static int rseq_addv(intptr_t *v, intptr_t count, int cpu) -+{ -+ /* clang-format off */ -+ __asm__ __volatile__ goto( -+ ".pushsection __rseq_table, \"aw\"\n\t" -+ ".balign 32\n\t" -+ "cs_obj:\n\t" -+ /* version, flags */ -+ ".long 0, 0\n\t" -+ /* start_ip, post_commit_ip, abort_ip */ -+ ".quad 1f, (2f-1f), 4f\n\t" -+ ".popsection\n\t" -+ "1:\n\t" -+ "leaq cs_obj(%%rip), %%rax\n\t" -+ "movq %%rax, %[rseq_cs]\n\t" -+ "cmpl %[cpu_id], %[current_cpu_id]\n\t" -+ "jnz 4f\n\t" -+ "addq %[count], %[v]\n\t" /* final store */ -+ "2:\n\t" -+ ".pushsection __rseq_failure, \"ax\"\n\t" -+ /* Disassembler-friendly signature: nopl (%rip). */ -+ ".byte 0x0f, 0x1f, 0x05\n\t" -+ ".long 0x53053053\n\t" /* RSEQ_FLAGS */ -+ "4:\n\t" -+ "jmp abort\n\t" -+ ".popsection\n\t" -+ : /* gcc asm goto does not allow outputs */ -+ : [cpu_id] "r" (cpu), -+ [current_cpu_id] "m" (__rseq_abi.cpu_id), -+ [rseq_cs] "m" (__rseq_abi.rseq_cs), -+ /* final store input */ -+ [v] "m" (*v), -+ [count] "er" (count) -+ : "memory", "cc", "rax" -+ : abort -+ ); -+ /* clang-format on */ -+ -+ return 0; -+abort: -+ return -1; -+} -+ -+int main(int argc, char *argv[]) -+{ -+ int cpu, ret; -+ intptr_t *cpu_data; -+ long nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); -+ -+ test_init(argc, argv); -+ -+ cpu_data = calloc(nr_cpus, sizeof(*cpu_data)); -+ if (!cpu_data) { -+ fail("calloc"); -+ exit(EXIT_FAILURE); -+ } -+ -+ register_thread(); -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ check_thread(); -+ -+ cpu = RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start); -+ ret = rseq_addv(&cpu_data[cpu], 2, cpu); -+ if (ret) -+ fail("Failed to increment per-cpu counter"); -+ else -+ test_msg("cpu_data[%d] == %ld\n", cpu, (long int)cpu_data[cpu]); -+ -+ if (cpu_data[cpu] == 2) -+ pass(); -+ else -+ fail(); -+ -+ return 0; -+} -+ -+#else -+ -+int main(int argc, char *argv[]) -+{ -+ test_init(argc, argv); -+ skip("Unsupported arch"); -+ return 0; -+} -+ -+#endif -\ No newline at end of file -diff --git a/test/zdtm/static/rseq00.desc b/test/zdtm/static/rseq00.desc -new file mode 100644 -index 0000000..0324fa3 ---- /dev/null -+++ b/test/zdtm/static/rseq00.desc -@@ -0,0 +1 @@ -+{'flavor': 'h', 'arch': 'x86_64', 'feature': 'get_rseq_conf'} --- -2.34.1 - diff --git a/0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus-We-have-a.patch b/0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus-We-have-a.patch deleted file mode 100644 index fc696489a0d4b639b9dbb2e9a467feffa0f3027e..0000000000000000000000000000000000000000 --- a/0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus-We-have-a.patch +++ /dev/null @@ -1,123 +0,0 @@ -From 4ebfba180d44706e50afb525cc992ac708e83883 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 14:57:16 +0800 -Subject: [PATCH 08/72] ci: add Fedora Rawhide based test on Cirrus We have - ability to use nested virtualization on Cirrus, and already have "Vagrant - Fedora based test (no VDSO)" test, let's do analogical for Fedora Rawhide to - get fresh kernel. - -Suggested-by: Adrian Reber -Signed-off-by: Alexander Mikhalitsyn ---- - .cirrus.yml | 21 +++++++++++++++++++++ - scripts/ci/Makefile | 7 +++++-- - scripts/ci/run-ci-tests.sh | 5 +++++ - scripts/ci/vagrant.sh | 21 +++++++++++++++++++++ - 4 files changed, 52 insertions(+), 2 deletions(-) - -diff --git a/.cirrus.yml b/.cirrus.yml -index 671178d..9716e58 100644 ---- a/.cirrus.yml -+++ b/.cirrus.yml -@@ -19,6 +19,27 @@ task: - build_script: | - make -C scripts/ci vagrant-fedora-no-vdso - -+task: -+ name: Vagrant Fedora Rawhide based test -+ environment: -+ HOME: "/root" -+ CIRRUS_WORKING_DIR: "/tmp/criu" -+ -+ compute_engine_instance: -+ image_project: cirrus-images -+ image: family/docker-kvm -+ platform: linux -+ cpu: 4 -+ memory: 16G -+ nested_virtualization: true -+ -+ setup_script: | -+ scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker -+ sudo kvm-ok -+ ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto -+ build_script: | -+ make -C scripts/ci vagrant-fedora-rawhide -+ - task: - name: CentOS 8 based test - environment: -diff --git a/scripts/ci/Makefile b/scripts/ci/Makefile -index 02b4d87..9c9264d 100644 ---- a/scripts/ci/Makefile -+++ b/scripts/ci/Makefile -@@ -41,7 +41,7 @@ export CONTAINER_TERMINAL - ifeq ($(UNAME),x86_64) - # On anything besides x86_64 Travis is running unprivileged LXD - # containers which do not support running docker with '--privileged'. -- CONTAINER_OPTS := --rm $(CONTAINER_TERMINAL) --privileged -v /lib/modules:/lib/modules --tmpfs /run -+ CONTAINER_OPTS := --rm $(CONTAINER_TERMINAL) --privileged --userns=host --cgroupns=host -v /lib/modules:/lib/modules --tmpfs /run - else - CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run - endif -@@ -92,7 +92,10 @@ setup-vagrant: - vagrant-fedora-no-vdso: setup-vagrant - ./vagrant.sh fedora-no-vdso - --.PHONY: setup-vagrant vagrant-fedora-no-vdso -+vagrant-fedora-rawhide: setup-vagrant -+ ./vagrant.sh fedora-rawhide -+ -+.PHONY: setup-vagrant vagrant-fedora-no-vdso vagrant-fedora-rawhide - - %: - $(MAKE) -C ../build $@$(target-suffix) -diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh -index 7c66e68..95b4ec7 100755 ---- a/scripts/ci/run-ci-tests.sh -+++ b/scripts/ci/run-ci-tests.sh -@@ -194,6 +194,11 @@ if [ "${STREAM_TEST}" = "1" ]; then - exit 0 - fi - -+# print some useful debug info -+cat /proc/self/status -+ls -la /proc/self/ns -+cat /proc/self/cgroup -+ - # shellcheck disable=SC2086 - ./test/zdtm.py run -a -p 2 --keep-going $ZDTM_OPTS - -diff --git a/scripts/ci/vagrant.sh b/scripts/ci/vagrant.sh -index 839b100..f961b8d 100755 ---- a/scripts/ci/vagrant.sh -+++ b/scripts/ci/vagrant.sh -@@ -58,4 +58,25 @@ fedora-no-vdso() { - ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -t zdtm/transition/pidfd_store_sk --rpc --pre 2' - } - -+fedora-rawhide() { -+ #ssh default sudo grubby --update-kernel ALL --args="selinux=0 systemd.unified_cgroup_hierarchy=0" -+ ssh default sudo grubby --update-kernel ALL -+ # -+ # Workaround the problem: -+ # error running container: error from /usr/bin/crun creating container for [...]: sd-bus call: Transport endpoint is not connected -+ # Let's just use runc instead of crun -+ # see also https://github.com/kata-containers/tests/issues/4283 -+ # -+ ssh default 'sudo dnf remove -y crun || true' -+ ssh default sudo dnf install -y podman runc -+ vagrant reload -+ #ssh default sudo setenforce 0 -+ ssh default cat /proc/cmdline -+ ssh default ls -la /proc/self/ns -+ ssh default sudo cat /proc/self/status -+ ssh default sudo cat /proc/self/cgroup -+ #ssh default sudo capsh --print -+ ssh default 'cd /vagrant; tar xf criu.tar; cd criu; sudo -E make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"' -+} -+ - $1 --- -2.34.1 - diff --git a/0009-include-add-thread_pointer.h-from-Glibc-Implementati.patch b/0009-include-add-thread_pointer.h-from-Glibc-Implementati.patch deleted file mode 100644 index f10df2940701a477fc3a51f97f50201ddd007f0b..0000000000000000000000000000000000000000 --- a/0009-include-add-thread_pointer.h-from-Glibc-Implementati.patch +++ /dev/null @@ -1,244 +0,0 @@ -From 159d2b7c889ae23ece99595af8a12f766c7b1aff Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:00:07 +0800 -Subject: [PATCH 09/72] include: add thread_pointer.h from Glibc Implementation - was taken from the Glibc. - -https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=8dbeb0561eeb876f557ac9eef5721912ec074ea5 -https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=cb976fba4c51ede7bf8cee5035888527c308dfbc - -Signed-off-by: Alexander Mikhalitsyn ---- - .../arch/aarch64/include/asm/thread_pointer.h | 27 ++++++++++++++ - criu/arch/arm/include/asm/thread_pointer.h | 27 ++++++++++++++ - criu/arch/mips/include/asm/thread_pointer.h | 27 ++++++++++++++ - criu/arch/ppc64/include/asm/thread_pointer.h | 33 +++++++++++++++++ - criu/arch/s390/include/asm/thread_pointer.h | 27 ++++++++++++++ - criu/arch/x86/include/asm/thread_pointer.h | 37 +++++++++++++++++++ - 6 files changed, 178 insertions(+) - create mode 100644 criu/arch/aarch64/include/asm/thread_pointer.h - create mode 100644 criu/arch/arm/include/asm/thread_pointer.h - create mode 100644 criu/arch/mips/include/asm/thread_pointer.h - create mode 100644 criu/arch/ppc64/include/asm/thread_pointer.h - create mode 100644 criu/arch/s390/include/asm/thread_pointer.h - create mode 100644 criu/arch/x86/include/asm/thread_pointer.h - -diff --git a/criu/arch/aarch64/include/asm/thread_pointer.h b/criu/arch/aarch64/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..f7e0706 ---- /dev/null -+++ b/criu/arch/aarch64/include/asm/thread_pointer.h -@@ -0,0 +1,27 @@ -+/* __thread_pointer definition. Generic version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+static inline void *__criu_thread_pointer(void) -+{ -+ return __builtin_thread_pointer(); -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -diff --git a/criu/arch/arm/include/asm/thread_pointer.h b/criu/arch/arm/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..f7e0706 ---- /dev/null -+++ b/criu/arch/arm/include/asm/thread_pointer.h -@@ -0,0 +1,27 @@ -+/* __thread_pointer definition. Generic version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+static inline void *__criu_thread_pointer(void) -+{ -+ return __builtin_thread_pointer(); -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -diff --git a/criu/arch/mips/include/asm/thread_pointer.h b/criu/arch/mips/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..f7e0706 ---- /dev/null -+++ b/criu/arch/mips/include/asm/thread_pointer.h -@@ -0,0 +1,27 @@ -+/* __thread_pointer definition. Generic version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+static inline void *__criu_thread_pointer(void) -+{ -+ return __builtin_thread_pointer(); -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -diff --git a/criu/arch/ppc64/include/asm/thread_pointer.h b/criu/arch/ppc64/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..304516f ---- /dev/null -+++ b/criu/arch/ppc64/include/asm/thread_pointer.h -@@ -0,0 +1,33 @@ -+/* __thread_pointer definition. powerpc version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+#ifdef __powerpc64__ -+register void *__thread_register asm("r13"); -+#else -+register void *__thread_register asm("r2"); -+#endif -+ -+static inline void *__criu_thread_pointer(void) -+{ -+ return __thread_register; -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -\ No newline at end of file -diff --git a/criu/arch/s390/include/asm/thread_pointer.h b/criu/arch/s390/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..f7e0706 ---- /dev/null -+++ b/criu/arch/s390/include/asm/thread_pointer.h -@@ -0,0 +1,27 @@ -+/* __thread_pointer definition. Generic version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+static inline void *__criu_thread_pointer(void) -+{ -+ return __builtin_thread_pointer(); -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -diff --git a/criu/arch/x86/include/asm/thread_pointer.h b/criu/arch/x86/include/asm/thread_pointer.h -new file mode 100644 -index 0000000..08603ae ---- /dev/null -+++ b/criu/arch/x86/include/asm/thread_pointer.h -@@ -0,0 +1,37 @@ -+/* __thread_pointer definition. x86 version. -+ Copyright (C) 2021 Free Software Foundation, Inc. -+ This file is part of the GNU C Library. -+ -+ The GNU C Library is free software; you can redistribute it and/or -+ modify it under the terms of the GNU Lesser General Public -+ License as published by the Free Software Foundation; either -+ version 2.1 of the License, or (at your option) any later version. -+ -+ The GNU C Library is distributed in the hope that it will be useful, -+ but WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ Lesser General Public License for more details. -+ -+ You should have received a copy of the GNU Lesser General Public -+ License along with the GNU C Library. If not, see -+ . */ -+ -+#ifndef _SYS_THREAD_POINTER_H -+#define _SYS_THREAD_POINTER_H -+ -+static inline void *__criu_thread_pointer(void) -+{ -+#if __GNUC_PREREQ(11, 1) -+ return __builtin_thread_pointer(); -+#else -+ void *__result; -+#ifdef __x86_64__ -+ __asm__("mov %%fs:0, %0" : "=r"(__result)); -+#else -+ __asm__("mov %%gs:0, %0" : "=r"(__result)); -+#endif -+ return __result; -+#endif /* !GCC 11 */ -+} -+ -+#endif /* _SYS_THREAD_POINTER_H */ -\ No newline at end of file --- -2.34.1 - diff --git a/0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch b/0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch deleted file mode 100644 index e02dfe0d6ca7d8039d108db977b08fa781b1825e..0000000000000000000000000000000000000000 --- a/0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 0fdb1cf439c08f6e957e2e7d234a015ef3b84dfc Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:00:43 +0800 -Subject: [PATCH 10/72] clone-noasan: unregister rseq at the thread start for - new glibc Fresh glibc does rseq registration by default during - start_thread(). [ see - https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=95e114a0919d844d8fe07839cb6538b7f5ee920e - ] - -This cause process crashes during memory restore procedure, because -memory which corresponds to the struct rseq will be overwritten. - -See also -("nptl: Add public rseq symbols and ") -https://sourceware.org/git?p=glibc.git;a=commit;h=c901c3e764d7c7079f006b4e21e877d5036eb4f5 -("nptl: Add for defining __thread_pointer") -https://sourceware.org/git?p=glibc.git;a=commit;h=8dbeb0561eeb876f557ac9eef5721912ec074ea5 - -Signed-off-by: Alexander Mikhalitsyn ---- - criu/clone-noasan.c | 42 ++++++++++++++++++++++++++++++++++++++++-- - 1 file changed, 40 insertions(+), 2 deletions(-) - -diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c -index d657ea2..5f8dd1b 100644 ---- a/criu/clone-noasan.c -+++ b/criu/clone-noasan.c -@@ -2,6 +2,13 @@ - #include - #include - -+#ifdef __has_include -+#if __has_include ("sys/rseq.h") -+#include -+#include "asm/thread_pointer.h" -+#endif -+#endif -+ - #include - - #include "sched.h" -@@ -34,16 +41,45 @@ - * ... wait for process to finish ... - * unlock_last_pid - */ -+ -+#if defined(RSEQ_SIG) -+static inline void unregister_glibc_rseq(void) -+{ -+ /* unregister rseq */ -+ syscall(__NR_rseq, (void *)((char *)__criu_thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG); -+} -+#else -+static inline void unregister_glibc_rseq(void) -+{ -+} -+#endif -+ -+struct call_fn_args { -+ int (*fn)(void *); -+ void *arg; -+}; -+ -+int call_fn(void *arg) -+{ -+ struct call_fn_args *cargs = arg; -+ unregister_glibc_rseq(); -+ return cargs->fn(cargs->arg); -+} -+ - int clone_noasan(int (*fn)(void *), int flags, void *arg) - { - void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 1024, 16); -+ struct call_fn_args a = { -+ .fn = fn, -+ .arg = arg, -+ }; - - BUG_ON((flags & CLONE_VM) && !(flags & CLONE_VFORK)); - /* - * Reserve some bytes for clone() internal needs - * and use as stack the address above this area. - */ -- return clone(fn, stack_ptr, flags, arg); -+ return clone(call_fn, stack_ptr, flags, (void *)&a); - } - - int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, int exit_signal, pid_t pid) -@@ -78,7 +114,9 @@ int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, int exit_sig - c_args.set_tid = ptr_to_u64(&pid); - c_args.set_tid_size = 1; - pid = syscall(__NR_clone3, &c_args, sizeof(c_args)); -- if (pid == 0) -+ if (pid == 0) { -+ unregister_glibc_rseq(); - exit(fn(arg)); -+ } - return pid; - } --- -2.34.1 - diff --git a/0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch b/0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch deleted file mode 100644 index cd99dde41df2ec7cab54aff86f5e69f877a07722..0000000000000000000000000000000000000000 --- a/0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch +++ /dev/null @@ -1,158 +0,0 @@ -From 7cc800d2cfbfb6fe686345a652472b194ca2b9cf Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:01:34 +0800 -Subject: [PATCH 11/72] zdtm/static/rseq00: fix rseq test when linking with a - fresh Glibc Fresh Glibc does rseq() register by default. We need to - unregister rseq before registering our own. - -Signed-off-by: Alexander Mikhalitsyn ---- - test/zdtm/static/rseq00.c | 76 +++++++++++++++++++++++++++++---------- - 1 file changed, 58 insertions(+), 18 deletions(-) - -diff --git a/test/zdtm/static/rseq00.c b/test/zdtm/static/rseq00.c -index 26f41a2..87053b8 100644 ---- a/test/zdtm/static/rseq00.c -+++ b/test/zdtm/static/rseq00.c -@@ -19,13 +19,48 @@ - - #include "zdtmtst.h" - --#if defined(__x86_64__) -+#ifdef __has_include -+#if __has_include("sys/rseq.h") -+#include -+#endif -+#endif -+ -+#if defined(__i386__) || defined(__x86_64__) -+ -+#if defined(RSEQ_SIG) -+static inline void *__criu_thread_pointer(void) -+{ -+#if __GNUC_PREREQ(11, 1) -+ return __builtin_thread_pointer(); -+#else -+ void *__result; -+#ifdef __x86_64__ -+ __asm__("mov %%fs:0, %0" : "=r"(__result)); -+#else -+ __asm__("mov %%gs:0, %0" : "=r"(__result)); -+#endif -+ return __result; -+#endif /* !GCC 11 */ -+} -+ -+static inline void unregister_glibc_rseq(void) -+{ -+ /* unregister rseq */ -+ syscall(__NR_rseq, (void *)((char *)__criu_thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG); -+} -+#else -+static inline void unregister_glibc_rseq(void) -+{ -+} -+#endif - - const char *test_doc = "Check that rseq() basic C/R works"; - const char *test_author = "Alexander Mikhalitsyn "; - /* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */ - - /* some useful definitions from kernel uapi */ -+#ifndef RSEQ_SIG -+ - enum rseq_flags { - RSEQ_FLAG_UNREGISTER = (1 << 0), - }; -@@ -37,14 +72,21 @@ struct rseq { - uint32_t flags; - } __attribute__((aligned(4 * sizeof(uint64_t)))); - -+#define RSEQ_SIG 0x53053053 -+ -+#endif -+ - #ifndef __NR_rseq - #define __NR_rseq 334 - #endif - /* EOF */ - --static __thread volatile struct rseq __rseq_abi; -+#define RSEQ_TLS_ALLOC 0 - --#define RSEQ_SIG 0x53053053 -+static volatile struct rseq *rseq_ptr; -+#if RSEQ_TLS_ALLOC -+static __thread volatile struct rseq __rseq_abi; -+#endif - - static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) - { -@@ -54,27 +96,18 @@ static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags - static void register_thread(void) - { - int rc; -- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); -+ unregister_glibc_rseq(); -+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG); - if (rc) { - fail("Failed to register rseq"); - exit(1); - } - } - --static void unregister_thread(void) --{ -- int rc; -- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), RSEQ_FLAG_UNREGISTER, RSEQ_SIG); -- if (rc) { -- fail("Failed to unregister rseq"); -- exit(1); -- } --} -- - static void check_thread(void) - { - int rc; -- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG); -+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG); - if (!(rc && errno == EBUSY)) { - fail("Failed to check rseq %d", rc); - exit(1); -@@ -111,8 +144,8 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu) - ".popsection\n\t" - : /* gcc asm goto does not allow outputs */ - : [cpu_id] "r" (cpu), -- [current_cpu_id] "m" (__rseq_abi.cpu_id), -- [rseq_cs] "m" (__rseq_abi.rseq_cs), -+ [current_cpu_id] "m" (rseq_ptr->cpu_id), -+ [rseq_cs] "m" (rseq_ptr->rseq_cs), - /* final store input */ - [v] "m" (*v), - [count] "er" (count) -@@ -132,6 +165,13 @@ int main(int argc, char *argv[]) - intptr_t *cpu_data; - long nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - -+#if RSEQ_TLS_ALLOC -+ rseq_ptr = &__rseq_abi; -+#else -+ //rseq_ptr = malloc(sizeof(struct rseq)); -+ rseq_ptr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, 0, 0); -+#endif -+ - test_init(argc, argv); - - cpu_data = calloc(nr_cpus, sizeof(*cpu_data)); -@@ -147,7 +187,7 @@ int main(int argc, char *argv[]) - - check_thread(); - -- cpu = RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start); -+ cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start); - ret = rseq_addv(&cpu_data[cpu], 2, cpu); - if (ret) - fail("Failed to increment per-cpu counter"); --- -2.34.1 - diff --git a/0012-compel-add-helpers-to-get-set-instruction-pointer-Si.patch b/0012-compel-add-helpers-to-get-set-instruction-pointer-Si.patch deleted file mode 100644 index d5d00edd247eced2770c298b9a89c2da22eb471f..0000000000000000000000000000000000000000 --- a/0012-compel-add-helpers-to-get-set-instruction-pointer-Si.patch +++ /dev/null @@ -1,265 +0,0 @@ -From 65eb254d6ad2f1b1d36e95f431b05faf9e67524d Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:02:08 +0800 -Subject: [PATCH 12/72] compel: add helpers to get/set instruction pointer - Signed-off-by: Alexander Mikhalitsyn - ---- - .../src/lib/include/uapi/asm/infect-types.h | 9 +++++---- - .../src/lib/include/uapi/asm/infect-types.h | 9 +++++---- - .../src/lib/include/uapi/asm/infect-types.h | 9 +++++---- - .../src/lib/include/uapi/asm/infect-types.h | 9 +++++---- - .../src/lib/include/uapi/asm/infect-types.h | 7 ++++--- - .../src/lib/include/uapi/asm/infect-types.h | 9 +++++---- - compel/include/uapi/infect.h | 6 ++++++ - compel/src/lib/infect.c | 20 +++++++++++++++++++ - criu/arch/aarch64/include/asm/types.h | 2 ++ - criu/arch/arm/include/asm/types.h | 2 ++ - criu/arch/mips/include/asm/types.h | 2 ++ - criu/arch/ppc64/include/asm/types.h | 2 ++ - criu/arch/s390/include/asm/types.h | 2 ++ - criu/arch/x86/include/asm/types.h | 2 ++ - 14 files changed, 67 insertions(+), 23 deletions(-) - -diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h -index f91e73d..9d4ce7e 100644 ---- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h -@@ -23,10 +23,11 @@ typedef struct user_fpsimd_state user_fpregs_struct_t; - #define compel_arch_get_tls_task(ctl, tls) - #define compel_arch_get_tls_thread(tctl, tls) - --#define REG_RES(r) ((uint64_t)(r).regs[0]) --#define REG_IP(r) ((uint64_t)(r).pc) --#define REG_SP(r) ((uint64_t)((r).sp)) --#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8]) -+#define REG_RES(r) ((uint64_t)(r).regs[0]) -+#define REG_IP(r) ((uint64_t)(r).pc) -+#define SET_REG_IP(r, val) ((r).pc = (val)) -+#define REG_SP(r) ((uint64_t)((r).sp)) -+#define REG_SYSCALL_NR(r) ((uint64_t)(r).regs[8]) - - #define user_regs_native(pregs) true - -diff --git a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h -index 159b6a9..8d32825 100644 ---- a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h -@@ -56,10 +56,11 @@ struct user_vfp_exc { - unsigned long fpinst2; - }; - --#define REG_RES(regs) ((regs).ARM_r0) --#define REG_IP(regs) ((regs).ARM_pc) --#define REG_SP(regs) ((regs).ARM_sp) --#define REG_SYSCALL_NR(regs) ((regs).ARM_r7) -+#define REG_RES(regs) ((regs).ARM_r0) -+#define REG_IP(regs) ((regs).ARM_pc) -+#define SET_REG_IP(regs, val) ((regs).ARM_pc = (val)) -+#define REG_SP(regs) ((regs).ARM_sp) -+#define REG_SYSCALL_NR(regs) ((regs).ARM_r7) - - #define user_regs_native(pregs) true - -diff --git a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h -index 70b3f85..481566a 100644 ---- a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h -@@ -56,10 +56,11 @@ static inline bool user_regs_native(user_regs_struct_t *pregs) - #define compel_arch_get_tls_task(ctl, tls) - #define compel_arch_get_tls_thread(tctl, tls) - --#define REG_RES(regs) ((regs).MIPS_v0) --#define REG_IP(regs) ((regs).cp0_epc) --#define REG_SP(regs) ((regs).MIPS_sp) --#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0) -+#define REG_RES(regs) ((regs).MIPS_v0) -+#define REG_IP(regs) ((regs).cp0_epc) -+#define SET_REG_IP(regs, val) ((regs).cp0_epc = (val)) -+#define REG_SP(regs) ((regs).MIPS_sp) -+#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0) - - //#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall) - #define __NR(syscall, compat) __NR_##syscall -diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h -index fe6192e..bf2cc95 100644 ---- a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h -@@ -72,10 +72,11 @@ typedef struct { - } tm; - } user_fpregs_struct_t; - --#define REG_RES(regs) ((uint64_t)(regs).gpr[3]) --#define REG_IP(regs) ((uint64_t)(regs).nip) --#define REG_SP(regs) ((uint64_t)(regs).gpr[1]) --#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0]) -+#define REG_RES(regs) ((uint64_t)(regs).gpr[3]) -+#define REG_IP(regs) ((uint64_t)(regs).nip) -+#define SET_REG_IP(regs, val) ((regs).nip = (val)) -+#define REG_SP(regs) ((uint64_t)(regs).gpr[1]) -+#define REG_SYSCALL_NR(regs) ((uint64_t)(regs).gpr[0]) - - #define user_regs_native(pregs) true - -diff --git a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h -index 896d70e..87283bc 100644 ---- a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h -@@ -62,9 +62,10 @@ typedef struct { - uint32_t system_call; - } user_regs_struct_t; - --#define REG_RES(r) ((uint64_t)(r).prstatus.gprs[2]) --#define REG_IP(r) ((uint64_t)(r).prstatus.psw.addr) --#define REG_SP(r) ((uint64_t)(r).prstatus.gprs[15]) -+#define REG_RES(r) ((uint64_t)(r).prstatus.gprs[2]) -+#define REG_IP(r) ((uint64_t)(r).prstatus.psw.addr) -+#define SET_REG_IP(r, val) ((r).prstatus.psw.addr = (val)) -+#define REG_SP(r) ((uint64_t)(r).prstatus.gprs[15]) - /* - * We assume that REG_SYSCALL_NR() is only used for pie code where we - * always use svc 0 with opcode in %r1. -diff --git a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h -index 34b3ad0..b35504f 100644 ---- a/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h -+++ b/compel/arch/x86/src/lib/include/uapi/asm/infect-types.h -@@ -127,10 +127,11 @@ typedef struct { - - typedef struct xsave_struct user_fpregs_struct_t; - --#define REG_RES(regs) get_user_reg(®s, ax) --#define REG_IP(regs) get_user_reg(®s, ip) --#define REG_SP(regs) get_user_reg(®s, sp) --#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax) -+#define REG_RES(regs) get_user_reg(®s, ax) -+#define REG_IP(regs) get_user_reg(®s, ip) -+#define SET_REG_IP(regs, val) set_user_reg(®s, ip, val) -+#define REG_SP(regs) get_user_reg(®s, sp) -+#define REG_SYSCALL_NR(regs) get_user_reg(®s, orig_ax) - - #define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall) - -diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h -index c3d2ee6..389878e 100644 ---- a/compel/include/uapi/infect.h -+++ b/compel/include/uapi/infect.h -@@ -168,4 +168,10 @@ extern unsigned long compel_task_size(void); - extern uint64_t compel_get_leader_sp(struct parasite_ctl *ctl); - extern uint64_t compel_get_thread_sp(struct parasite_thread_ctl *tctl); - -+extern uint64_t compel_get_leader_ip(struct parasite_ctl *ctl); -+extern uint64_t compel_get_thread_ip(struct parasite_thread_ctl *tctl); -+ -+void compel_set_leader_ip(struct parasite_ctl *ctl, uint64_t v); -+void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v); -+ - #endif -diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c -index 0fb9e71..6a13cc1 100644 ---- a/compel/src/lib/infect.c -+++ b/compel/src/lib/infect.c -@@ -1686,3 +1686,23 @@ uint64_t compel_get_thread_sp(struct parasite_thread_ctl *tctl) - { - return REG_SP(tctl->th.regs); - } -+ -+uint64_t compel_get_leader_ip(struct parasite_ctl *ctl) -+{ -+ return REG_IP(ctl->orig.regs); -+} -+ -+uint64_t compel_get_thread_ip(struct parasite_thread_ctl *tctl) -+{ -+ return REG_IP(tctl->th.regs); -+} -+ -+void compel_set_leader_ip(struct parasite_ctl *ctl, uint64_t v) -+{ -+ SET_REG_IP(ctl->orig.regs, v); -+} -+ -+void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v) -+{ -+ SET_REG_IP(tctl->th.regs, v); -+} -diff --git a/criu/arch/aarch64/include/asm/types.h b/criu/arch/aarch64/include/asm/types.h -index c860af1..363c1ca 100644 ---- a/criu/arch/aarch64/include/asm/types.h -+++ b/criu/arch/aarch64/include/asm/types.h -@@ -22,6 +22,8 @@ typedef UserAarch64RegsEntry UserRegsEntry; - - #define TI_SP(core) ((core)->ti_aarch64->gpregs->sp) - -+#define TI_IP(core) ((core)->ti_aarch64->gpregs->pc) -+ - static inline void *decode_pointer(uint64_t v) - { - return (void *)v; -diff --git a/criu/arch/arm/include/asm/types.h b/criu/arch/arm/include/asm/types.h -index cfcb8a1..93d2dc2 100644 ---- a/criu/arch/arm/include/asm/types.h -+++ b/criu/arch/arm/include/asm/types.h -@@ -21,6 +21,8 @@ typedef UserArmRegsEntry UserRegsEntry; - - #define TI_SP(core) ((core)->ti_arm->gpregs->sp) - -+#define TI_IP(core) ((core)->ti_arm->gpregs->ip) -+ - static inline void *decode_pointer(u64 v) - { - return (void *)(u32)v; -diff --git a/criu/arch/mips/include/asm/types.h b/criu/arch/mips/include/asm/types.h -index 237471f..2c75b6a 100644 ---- a/criu/arch/mips/include/asm/types.h -+++ b/criu/arch/mips/include/asm/types.h -@@ -18,6 +18,8 @@ - - #define CORE_THREAD_ARCH_INFO(core) core->ti_mips - -+#define TI_IP(core) ((core)->ti_mips->gpregs->cp0_epc) -+ - typedef UserMipsRegsEntry UserRegsEntry; - - static inline u64 encode_pointer(void *p) -diff --git a/criu/arch/ppc64/include/asm/types.h b/criu/arch/ppc64/include/asm/types.h -index fedeff2..d60aadd 100644 ---- a/criu/arch/ppc64/include/asm/types.h -+++ b/criu/arch/ppc64/include/asm/types.h -@@ -19,6 +19,8 @@ typedef UserPpc64RegsEntry UserRegsEntry; - - #define CORE_THREAD_ARCH_INFO(core) core->ti_ppc64 - -+#define TI_IP(core) ((core)->ti_ppc64->gpregs->nip) -+ - static inline void *decode_pointer(uint64_t v) - { - return (void *)v; -diff --git a/criu/arch/s390/include/asm/types.h b/criu/arch/s390/include/asm/types.h -index 7522cf2..abf12de 100644 ---- a/criu/arch/s390/include/asm/types.h -+++ b/criu/arch/s390/include/asm/types.h -@@ -19,6 +19,8 @@ typedef UserS390RegsEntry UserRegsEntry; - - #define CORE_THREAD_ARCH_INFO(core) core->ti_s390 - -+#define TI_IP(core) ((core)->ti_s390->gpregs->psw_addr) -+ - static inline u64 encode_pointer(void *p) - { - return (u64)p; -diff --git a/criu/arch/x86/include/asm/types.h b/criu/arch/x86/include/asm/types.h -index a0a8ed9..8919d0a 100644 ---- a/criu/arch/x86/include/asm/types.h -+++ b/criu/arch/x86/include/asm/types.h -@@ -28,6 +28,8 @@ static inline int core_is_compat(CoreEntry *c) - - #define CORE_THREAD_ARCH_INFO(core) core->thread_info - -+#define TI_IP(core) ((core)->thread_info->gpregs->ip) -+ - typedef UserX86RegsEntry UserRegsEntry; - - static inline u64 encode_pointer(void *p) --- -2.34.1 - diff --git a/0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs-Signed-o.patch b/0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs-Signed-o.patch deleted file mode 100644 index ca9caf1b023db623a319647d74827499d3b2e9ca..0000000000000000000000000000000000000000 --- a/0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs-Signed-o.patch +++ /dev/null @@ -1,248 +0,0 @@ -From afe090a86d6634e3620ebae16d32960f2c4933cc Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:04:54 +0800 -Subject: [PATCH 13/72] cr-dump: fixup thread IP when inside rseq cs - Signed-off-by: Alexander Mikhalitsyn - ---- - criu/cr-dump.c | 155 ++++++++++++++++++++++++++++++++++++++-- - criu/include/parasite.h | 2 + - criu/include/pstree.h | 1 + - 3 files changed, 154 insertions(+), 4 deletions(-) - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index 91dd08a..a3f8973 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -1047,11 +1047,58 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item) - return 0; - } - --static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep) -+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, struct rseq_cs *rseq_cs) -+{ -+ int ret; -+ uint64_t addr; -+ -+ /* rseq is not registered */ -+ if (!rseq->rseq_abi_pointer) -+ return 0; -+ -+ /* -+ * We need to cover the case when victim process was inside rseq critical section -+ * at the moment when CRIU comes and seized it. We need to determine the borders -+ * of rseq critical section at first. To achieve that we need to access thread -+ * memory and read pointer to struct rseq_cs. -+ * -+ * We have two ways to access thread memory: from the parasite and using ptrace(). -+ * But it this case we can't use parasite, because if victim process returns to the -+ * execution, on the kernel side __rseq_handle_notify_resume hook will be called, -+ * then rseq_ip_fixup() -> clear_rseq_cs() and user space memory with struct rseq -+ * will be cleared. So, let's use ptrace(PTRACE_PEEKDATA). -+ */ -+ ret = ptrace_peek_area(tid, &addr, decode_pointer(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), -+ sizeof(uint64_t)); -+ if (ret) { -+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs addr\n", tid, (unsigned long)&addr, -+ (unsigned long)(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t)); -+ return -1; -+ } -+ -+ /* (struct rseq)->rseq_cs is NULL */ -+ if (!addr) -+ return 0; -+ -+ ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(addr), sizeof(struct rseq_cs)); -+ if (ret) { -+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid, -+ (unsigned long)rseq_cs, (unsigned long)addr, sizeof(struct rseq_cs)); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int dump_thread_rseq(struct pstree_item *item, int i) - { - struct __ptrace_rseq_configuration rseq; - RseqEntry *rseqe = NULL; - int ret; -+ CoreEntry *core = item->core[i]; -+ RseqEntry **rseqep = &core->thread_core->rseq_entry; -+ struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i]; -+ pid_t tid = item->threads[i].real; - - /* - * If we are here it means that rseq() syscall is supported, -@@ -1076,7 +1123,8 @@ static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep) - return -1; - } - -- pr_err("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, rseq.signature); -+ pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, -+ rseq.signature); - - rseqe = xmalloc(sizeof(*rseqe)); - if (!rseqe) -@@ -1088,25 +1136,118 @@ static int dump_thread_rseq(pid_t tid, RseqEntry **rseqep) - rseqe->rseq_abi_size = rseq.rseq_abi_size; - rseqe->signature = rseq.signature; - -+ if (read_rseq_cs(tid, &rseq, rseq_cs)) -+ goto err; -+ -+ /* save rseq entry to the image */ - *rseqep = rseqe; - - return 0; -+ -+err: -+ xfree(rseqe); -+ return -1; - } - - static int dump_task_rseq(pid_t pid, struct pstree_item *item) - { - int i; -+ struct rseq_cs *thread_rseq_cs; - - /* if rseq() syscall isn't supported then nothing to dump */ - if (!kdat.has_rseq) - return 0; - -+ thread_rseq_cs = xzalloc(sizeof(*thread_rseq_cs) * item->nr_threads); -+ if (!thread_rseq_cs) -+ return -1; -+ -+ dmpi(item)->thread_rseq_cs = thread_rseq_cs; -+ - for (i = 0; i < item->nr_threads; i++) { -- if (dump_thread_rseq(item->threads[i].real, &item->core[i]->thread_core->rseq_entry)) -- return -1; -+ if (dump_thread_rseq(item, i)) -+ goto free_rseq; - } - - return 0; -+ -+free_rseq: -+ xfree(thread_rseq_cs); -+ dmpi(item)->thread_rseq_cs = NULL; -+ return -1; -+} -+ -+static bool task_in_rseq(struct rseq_cs *rseq_cs, uint64_t addr) -+{ -+ return addr >= rseq_cs->start_ip && addr < rseq_cs->start_ip + rseq_cs->post_commit_offset; -+} -+ -+static int fixup_thread_rseq(struct pstree_item *item, int i) -+{ -+ CoreEntry *core = item->core[i]; -+ struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i]; -+ pid_t tid = item->threads[i].real; -+ -+ /* (struct rseq)->rseq_cs is NULL */ -+ if (!rseq_cs->start_ip) -+ return 0; -+ -+ pr_info("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n", -+ tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags, -+ rseq_cs->version, (unsigned long)TI_IP(core)); -+ -+ if (rseq_cs->version != 0) { -+ pr_err("unsupported RSEQ ABI version = %d\n", rseq_cs->version); -+ return -1; -+ } -+ -+ if (task_in_rseq(rseq_cs, TI_IP(core))) { -+ struct pid *tid = &item->threads[i]; -+ -+ pr_info("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n", -+ tid->real); -+ -+ /* -+ * We need to fixup task instruction pointer from -+ * the original one (which lays inside rseq critical section) -+ * to rseq abort handler address. -+ * -+ * It's worth to mention that we need to fixup IP in CoreEntry -+ * (used when full dump/restore is performed) and also in -+ * the parasite regs storage (used if --leave-running option is used, -+ * or if dump error occured and process execution is resumed). -+ */ -+ TI_IP(core) = rseq_cs->abort_ip; -+ -+ if (item->pid->real == tid->real) { -+ compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip); -+ } else { -+ compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip); -+ } -+ } -+ -+ return 0; -+} -+ -+static int fixup_task_rseq(pid_t pid, struct pstree_item *item) -+{ -+ int ret = 0; -+ int i; -+ -+ if (!kdat.has_ptrace_get_rseq_conf) -+ return 0; -+ -+ for (i = 0; i < item->nr_threads; i++) { -+ if (fixup_thread_rseq(item, i)) { -+ ret = -1; -+ goto exit; -+ } -+ } -+ -+exit: -+ xfree(dmpi(item)->thread_rseq_cs); -+ dmpi(item)->thread_rseq_cs = NULL; -+ return ret; - } - - static struct proc_pid_stat pps_buf; -@@ -1409,6 +1550,12 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - goto err; - } - -+ ret = fixup_task_rseq(pid, item); -+ if (ret) { -+ pr_err("Fixup rseq for %d failed %d\n", pid, ret); -+ goto err; -+ } -+ - if (fault_injected(FI_DUMP_EARLY)) { - pr_info("fault: CRIU sudden detach\n"); - kill(getpid(), SIGKILL); -diff --git a/criu/include/parasite.h b/criu/include/parasite.h -index 5fde809..d2a0688 100644 ---- a/criu/include/parasite.h -+++ b/criu/include/parasite.h -@@ -10,6 +10,8 @@ - #include - #include - -+#include "linux/rseq.h" -+ - #include "image.h" - #include "util-pie.h" - #include "common/lock.h" -diff --git a/criu/include/pstree.h b/criu/include/pstree.h -index c5b0fa7..458e5f9 100644 ---- a/criu/include/pstree.h -+++ b/criu/include/pstree.h -@@ -63,6 +63,7 @@ struct dmp_info { - struct parasite_ctl *parasite_ctl; - struct parasite_thread_ctl **thread_ctls; - uint64_t *thread_sp; -+ struct rseq_cs *thread_rseq_cs; - - /* - * Although we don't support dumping different struct creds in general, --- -2.34.1 - diff --git a/0014-zdtm-add-rseq-transition-test-for-amd64-Signed-off-b.patch b/0014-zdtm-add-rseq-transition-test-for-amd64-Signed-off-b.patch deleted file mode 100644 index f99e1e6476ef668e70b16292247c42f5c83995e2..0000000000000000000000000000000000000000 --- a/0014-zdtm-add-rseq-transition-test-for-amd64-Signed-off-b.patch +++ /dev/null @@ -1,250 +0,0 @@ -From 961a05f47822444406edeb3d90d9113bba44cdf3 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:05:34 +0800 -Subject: [PATCH 14/72] zdtm: add rseq transition test for amd64 Signed-off-by: - Alexander Mikhalitsyn - ---- - test/zdtm/transition/Makefile | 1 + - test/zdtm/transition/rseq01.c | 208 +++++++++++++++++++++++++++++++ - test/zdtm/transition/rseq01.desc | 1 + - 3 files changed, 210 insertions(+) - create mode 100644 test/zdtm/transition/rseq01.c - create mode 100644 test/zdtm/transition/rseq01.desc - -diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile -index 9388157..fae4e27 100644 ---- a/test/zdtm/transition/Makefile -+++ b/test/zdtm/transition/Makefile -@@ -23,6 +23,7 @@ TST_NOFILE = \ - lazy-thp \ - pid_reuse \ - pidfd_store_sk \ -+ rseq01 \ - - - TST_FILE = \ -diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c -new file mode 100644 -index 0000000..5fac5a6 ---- /dev/null -+++ b/test/zdtm/transition/rseq01.c -@@ -0,0 +1,208 @@ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "zdtmtst.h" -+ -+#ifdef __has_include -+# if __has_include ("sys/rseq.h") -+# include -+# endif -+#endif -+ -+#if defined(__x86_64__) -+ -+#if defined(__x86_64__) && defined(RSEQ_SIG) -+static inline void *thread_pointer(void) -+{ -+ void *result; -+ asm("mov %%fs:0, %0" : "=r"(result)); -+ return result; -+} -+ -+static inline void unregister_old_rseq(void) -+{ -+ /* unregister rseq */ -+ syscall(__NR_rseq, (void *)((char *)thread_pointer() + __rseq_offset), __rseq_size, 1, RSEQ_SIG); -+} -+#else -+static inline void unregister_old_rseq(void) -+{ -+} -+#endif -+ -+const char *test_doc = "rseq() transition test"; -+const char *test_author = "Alexander Mikhalitsyn "; -+ -+/* parts of code borrowed from https://www.efficios.com/blog/2019/02/08/linux-restartable-sequences/ */ -+ -+/* some useful definitions from kernel uapi */ -+#ifndef RSEQ_SIG -+ -+enum rseq_flags { -+ RSEQ_FLAG_UNREGISTER = (1 << 0), -+}; -+ -+struct rseq { -+ uint32_t cpu_id_start; -+ uint32_t cpu_id; -+ uint64_t rseq_cs; -+ uint32_t flags; -+} __attribute__((aligned(4 * sizeof(uint64_t)))); -+ -+#define RSEQ_SIG 0x53053053 -+ -+#endif -+ -+#ifndef __NR_rseq -+#define __NR_rseq 334 -+#endif -+/* EOF */ -+ -+static volatile struct rseq *rseq_ptr; -+static __thread volatile struct rseq __rseq_abi; -+ -+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len, int flags, uint32_t sig) -+{ -+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); -+} -+ -+static void register_thread(void) -+{ -+ int rc; -+ unregister_old_rseq(); -+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG); -+ if (rc) { -+ fail("Failed to register rseq"); -+ exit(1); -+ } -+} -+ -+static void check_thread(void) -+{ -+ int rc; -+ rc = sys_rseq(rseq_ptr, sizeof(struct rseq), 0, RSEQ_SIG); -+ if (!(rc && errno == EBUSY)) { -+ fail("Failed to check rseq %d", rc); -+ exit(1); -+ } -+} -+ -+#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x)) -+ -+static int rseq_addv(intptr_t *v, intptr_t count, int cpu) -+{ -+ double a = 10000000000000000.0; -+ double b = -1; -+ /*test_msg("enter %f %f\n", a, b);*/ -+ -+ /* clang-format off */ -+ __asm__ __volatile__ goto( -+ ".pushsection __rseq_table, \"aw\"\n\t" -+ ".balign 32\n\t" -+ "cs_obj:\n\t" -+ /* version, flags */ -+ ".long 0, 0\n\t" -+ /* start_ip, post_commit_offset, abort_ip */ -+ ".quad 1f, (2f-1f), 4f\n\t" -+ ".popsection\n\t" -+ "1:\n\t" -+ "leaq cs_obj(%%rip), %%rax\n\t" -+ "movq %%rax, %[rseq_cs]\n\t" -+ "cmpl %[cpu_id], %[current_cpu_id]\n\t" -+ "jnz 4f\n\t" -+ "addq %[count], %[v]\n\t" /* final store */ -+ "mov $10000000, %%rcx\n\t" -+ "fldl %[x]\n\t" /* we have st clobbered */ -+ "5:\n\t" -+ "fsqrt\n\t" /* heavy instruction */ -+ "dec %%rcx\n\t" -+ "jnz 5b\n\t" -+ "fstpl %[y]\n\t" -+ "2:\n\t" -+ ".pushsection __rseq_failure, \"ax\"\n\t" -+ /* Disassembler-friendly signature: nopl (%rip). */ -+ ".byte 0x0f, 0xb9, 0x3d\n\t" -+ ".long 0x53053053\n\t" /* RSEQ_FLAGS */ -+ "4:\n\t" -+ /*"fstpl %[y]\n\t"*/ -+ "jmp %l[abort]\n\t" -+ /*"jmp 1b\n\t"*/ -+ ".popsection\n\t" -+ : /* gcc asm goto does not allow outputs */ -+ : [cpu_id] "r" (cpu), -+ [current_cpu_id] "m" (rseq_ptr->cpu_id), -+ [rseq_cs] "m" (rseq_ptr->rseq_cs), -+ /* final store input */ -+ [v] "m" (*v), -+ [count] "er" (count), -+ [x] "m" (a), -+ [y] "m" (b) -+ : "memory", "cc", "rax", "rcx", "st" -+ : abort -+ ); -+ /* clang-format on */ -+ /*test_msg("exit %f %f\n", a, b);*/ -+ return 0; -+abort: -+ /*test_msg("abort %f %f\n", a, b);*/ -+ return -1; -+} -+ -+int main(int argc, char *argv[]) -+{ -+ int cpu = 0; -+ int ret; -+ intptr_t *cpu_data; -+ long nr_cpus; -+ -+ rseq_ptr = &__rseq_abi; -+ memset((void *)rseq_ptr, 0, sizeof(struct rseq)); -+ -+ test_init(argc, argv); -+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); -+ -+ cpu_data = calloc(nr_cpus, sizeof(*cpu_data)); -+ if (!cpu_data) { -+ fail("calloc"); -+ exit(EXIT_FAILURE); -+ } -+ register_thread(); -+ -+ test_daemon(); -+ -+ while (test_go()) { -+ cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start); -+ ret = rseq_addv(&cpu_data[cpu], 2, cpu); -+ if (ret) -+ fail("Failed to increment per-cpu counter"); -+ } -+ -+ test_waitsig(); -+ -+ check_thread(); -+ pass(); -+ -+ return 0; -+} -+ -+#else -+ -+int main(int argc, char *argv[]) -+{ -+ test_init(argc, argv); -+ skip("Unsupported arch"); -+ return 0; -+} -+ -+#endif -diff --git a/test/zdtm/transition/rseq01.desc b/test/zdtm/transition/rseq01.desc -new file mode 100644 -index 0000000..0324fa3 ---- /dev/null -+++ b/test/zdtm/transition/rseq01.desc -@@ -0,0 +1 @@ -+{'flavor': 'h', 'arch': 'x86_64', 'feature': 'get_rseq_conf'} --- -2.34.1 - diff --git a/0015-cr-dump-handle-rseq-flags-field-Userspace-may-config.patch b/0015-cr-dump-handle-rseq-flags-field-Userspace-may-config.patch deleted file mode 100644 index 0ac2b3e0ef72727c7f87537ae0abe2b89527dd5a..0000000000000000000000000000000000000000 --- a/0015-cr-dump-handle-rseq-flags-field-Userspace-may-config.patch +++ /dev/null @@ -1,330 +0,0 @@ -From 50f04f06eb3ecbdd465e417e8c5c8b19d43ec2f4 Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:09:44 +0800 -Subject: [PATCH 15/72] cr-dump: handle rseq flags field Userspace may - configure rseq critical section by def - -Signed-off-by: Alexander Mikhalitsyn ---- - criu/cr-dump.c | 86 +++++++++++++++++++++++++++---------------- - criu/cr-restore.c | 63 +++++++++++++++++++++++++++++++ - criu/include/pstree.h | 1 + - images/rseq.proto | 1 + - 4 files changed, 119 insertions(+), 32 deletions(-) - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index a3f8973..79387fb 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -1047,13 +1047,13 @@ static int dump_task_signals(pid_t pid, struct pstree_item *item) - return 0; - } - --static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, struct rseq_cs *rseq_cs) -+static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseqc, -+ struct rseq_cs *rseq_cs, struct rseq *rseq) - { - int ret; -- uint64_t addr; - - /* rseq is not registered */ -- if (!rseq->rseq_abi_pointer) -+ if (!rseqc->rseq_abi_pointer) - return 0; - - /* -@@ -1068,22 +1068,21 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, str - * then rseq_ip_fixup() -> clear_rseq_cs() and user space memory with struct rseq - * will be cleared. So, let's use ptrace(PTRACE_PEEKDATA). - */ -- ret = ptrace_peek_area(tid, &addr, decode_pointer(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), -- sizeof(uint64_t)); -+ ret = ptrace_peek_area(tid, rseq, decode_pointer(rseqc->rseq_abi_pointer), -+ sizeof(struct rseq)); - if (ret) { -- pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs addr\n", tid, (unsigned long)&addr, -- (unsigned long)(rseq->rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t)); -+ pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq struct\n", tid, (unsigned long)rseq, -+ (unsigned long)(rseqc->rseq_abi_pointer), sizeof(uint64_t)); - return -1; - } - -- /* (struct rseq)->rseq_cs is NULL */ -- if (!addr) -+ if (!rseq->rseq_cs.ptr64) - return 0; - -- ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(addr), sizeof(struct rseq_cs)); -+ ret = ptrace_peek_area(tid, rseq_cs, decode_pointer(rseq->rseq_cs.ptr64), sizeof(struct rseq_cs)); - if (ret) { - pr_err("ptrace_peek_area(%d, %lx, %lx, %lx): fail to read rseq_cs struct\n", tid, -- (unsigned long)rseq_cs, (unsigned long)addr, sizeof(struct rseq_cs)); -+ (unsigned long)rseq_cs, (unsigned long)rseq->rseq_cs.ptr64, sizeof(struct rseq_cs)); - return -1; - } - -@@ -1092,11 +1091,12 @@ static int read_rseq_cs(pid_t tid, struct __ptrace_rseq_configuration *rseq, str - - static int dump_thread_rseq(struct pstree_item *item, int i) - { -- struct __ptrace_rseq_configuration rseq; -+ struct __ptrace_rseq_configuration rseqc; - RseqEntry *rseqe = NULL; - int ret; - CoreEntry *core = item->core[i]; - RseqEntry **rseqep = &core->thread_core->rseq_entry; -+ struct rseq rseq; - struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i]; - pid_t tid = item->threads[i].real; - -@@ -1111,20 +1111,20 @@ static int dump_thread_rseq(struct pstree_item *item, int i) - if (!kdat.has_ptrace_get_rseq_conf) - return 0; - -- ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseq), &rseq); -- if (ret != sizeof(rseq)) { -+ ret = ptrace(PTRACE_GET_RSEQ_CONFIGURATION, tid, sizeof(rseqc), &rseqc); -+ if (ret != sizeof(rseqc)) { - pr_perror("ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) = %d", tid, ret); - return -1; - } - -- if (rseq.flags != 0) { -+ if (rseqc.flags != 0) { - pr_err("something wrong with ptrace(PTRACE_GET_RSEQ_CONFIGURATION, %d) flags = 0x%x\n", tid, -- rseq.flags); -+ rseqc.flags); - return -1; - } - -- pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseq.rseq_abi_pointer, -- rseq.signature); -+ pr_info("Dump rseq of %d: ptr = 0x%lx sign = 0x%x\n", tid, (unsigned long)rseqc.rseq_abi_pointer, -+ rseqc.signature); - - rseqe = xmalloc(sizeof(*rseqe)); - if (!rseqe) -@@ -1132,13 +1132,22 @@ static int dump_thread_rseq(struct pstree_item *item, int i) - - rseq_entry__init(rseqe); - -- rseqe->rseq_abi_pointer = rseq.rseq_abi_pointer; -- rseqe->rseq_abi_size = rseq.rseq_abi_size; -- rseqe->signature = rseq.signature; -+ rseqe->rseq_abi_pointer = rseqc.rseq_abi_pointer; -+ rseqe->rseq_abi_size = rseqc.rseq_abi_size; -+ rseqe->signature = rseqc.signature; - -- if (read_rseq_cs(tid, &rseq, rseq_cs)) -+ if (read_rseq_cs(tid, &rseqc, rseq_cs, &rseq)) - goto err; - -+ rseqe->has_rseq_cs_pointer = true; -+ rseqe->rseq_cs_pointer = rseq.rseq_cs.ptr64; -+ pr_err("cs pointer %lx\n", rseqe->rseq_cs_pointer); -+ /* we won't save rseq_cs to the image (only pointer), -+ * so let's combine flags from both struct rseq and struct rseq_cs -+ * (kernel does the same when interpreting RSEQ_CS_FLAG_*) -+ */ -+ rseq_cs->flags |= rseq.flags; -+ - /* save rseq entry to the image */ - *rseqep = rseqe; - -@@ -1188,11 +1197,11 @@ static int fixup_thread_rseq(struct pstree_item *item, int i) - struct rseq_cs *rseq_cs = &dmpi(item)->thread_rseq_cs[i]; - pid_t tid = item->threads[i].real; - -- /* (struct rseq)->rseq_cs is NULL */ -+ /* equivalent to (struct rseq)->rseq_cs is NULL */ - if (!rseq_cs->start_ip) - return 0; - -- pr_info("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n", -+ pr_debug("fixup_thread_rseq for %d: rseq_cs start_ip = %llx abort_ip = %llx post_commit_offset = %llx flags = %x version = %x; IP = %lx\n", - tid, rseq_cs->start_ip, rseq_cs->abort_ip, rseq_cs->post_commit_offset, rseq_cs->flags, - rseq_cs->version, (unsigned long)TI_IP(core)); - -@@ -1204,25 +1213,38 @@ static int fixup_thread_rseq(struct pstree_item *item, int i) - if (task_in_rseq(rseq_cs, TI_IP(core))) { - struct pid *tid = &item->threads[i]; - -- pr_info("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n", -- tid->real); -- - /* - * We need to fixup task instruction pointer from - * the original one (which lays inside rseq critical section) -- * to rseq abort handler address. -+ * to rseq abort handler address. But we need to look on rseq_cs->flags -+ * (please refer to struct rseq -> flags field description). -+ * Naive idea of flags support may be like... let's change instruction pointer (IP) -+ * to rseq_cs->abort_ip if !(rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL). -+ * But unfortunately, it doesn't work properly, because the kernel does -+ * clean up of rseq_cs field in the struct rseq (modifies userspace memory). -+ * So, we need to preserve original value of (struct rseq)->rseq_cs field in the -+ * image and restore it's value before releasing threads. - * - * It's worth to mention that we need to fixup IP in CoreEntry - * (used when full dump/restore is performed) and also in - * the parasite regs storage (used if --leave-running option is used, - * or if dump error occured and process execution is resumed). - */ -- TI_IP(core) = rseq_cs->abort_ip; - -- if (item->pid->real == tid->real) { -- compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip); -+ if (rseq_cs->flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) { -+ pr_err("The %d task is in rseq critical section.!!! IP will be set to rseq abort handler addr\n", -+ tid->real); - } else { -- compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip); -+ pr_warn("The %d task is in rseq critical section. IP will be set to rseq abort handler addr\n", -+ tid->real); -+ -+ TI_IP(core) = rseq_cs->abort_ip; -+ -+ if (item->pid->real == tid->real) { -+ compel_set_leader_ip(dmpi(item)->parasite_ctl, rseq_cs->abort_ip); -+ } else { -+ compel_set_thread_ip(dmpi(item)->thread_ctls[i], rseq_cs->abort_ip); -+ } - } - } - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index b2bd044..864140f 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -23,6 +23,7 @@ - #include "common/compiler.h" - - #include "linux/mount.h" -+#include "linux/rseq.h" - - #include "clone-noasan.h" - #include "cr_options.h" -@@ -779,6 +780,7 @@ static int open_cores(int pid, CoreEntry *leader_core) - { - int i, tpid; - CoreEntry **cores = NULL; -+ //RseqEntry *rseqs; - - cores = xmalloc(sizeof(*cores) * current->nr_threads); - if (!cores) -@@ -812,6 +814,19 @@ static int open_cores(int pid, CoreEntry *leader_core) - } - } - -+ -+ pr_err("item %lx\n", (uint64_t)current); -+ -+ for (i = 0; i < current->nr_threads; i++) { -+ ThreadCoreEntry *tc = cores[i]->thread_core; -+ -+ /* compatibility with older CRIU versions */ -+ if (!tc->rseq_entry) -+ continue; -+ -+ current->rseqe[i] = *tc->rseq_entry; -+ } -+ - return 0; - err: - xfree(cores); -@@ -868,8 +883,15 @@ static int restore_one_alive_task(int pid, CoreEntry *core) - { - unsigned args_len; - struct task_restore_args *ta; -+ RseqEntry *rseqs; - pr_info("Restoring resources\n"); - -+ rseqs = shmalloc(sizeof(*rseqs) * current->nr_threads); -+ if (!rseqs) -+ return -1; -+ -+ current->rseqe = rseqs; -+ - rst_mem_switch_to_private(); - - args_len = round_up(sizeof(*ta) + sizeof(struct thread_restore_args) * current->nr_threads, page_size()); -@@ -1966,6 +1988,44 @@ static int attach_to_tasks(bool root_seized) - return 0; - } - -+static int restore_rseq_cs(void) -+{ -+ struct pstree_item *item; -+ -+ for_each_pstree_item(item) { -+ int i; -+ -+ if (!task_alive(item)) -+ continue; -+ -+ if (item->nr_threads == 1) { -+ item->threads[0].real = item->pid->real; -+ } else { -+ if (parse_threads(item->pid->real, &item->threads, &item->nr_threads)) -+ return -1; -+ } -+ -+ for (i = 0; i < item->nr_threads; i++) { -+ pid_t pid = item->threads[i].real; -+ -+ if (!item->rseqe[i].rseq_cs_pointer || !item->rseqe[i].rseq_abi_pointer) { -+ pr_err("item %lx rseqe %lx\n", (uint64_t)item, (uint64_t)item->rseqe); -+ pr_err("nothing to do with cs_pointer\n"); -+ continue; -+ } -+ -+ pr_err("restoring cs ... %lx \n", item->rseqe[i].rseq_cs_pointer); -+ -+ if (ptrace_poke_area(pid, &item->rseqe[i].rseq_cs_pointer, (void *)(item->rseqe[i].rseq_abi_pointer + offsetof(struct rseq, rseq_cs)), sizeof(uint64_t))) { -+ pr_err("Can't restore memfd args (pid: %d)\n", pid); -+ return -1; -+ } -+ } -+ } -+ -+ return 0; -+} -+ - static int catch_tasks(bool root_seized, enum trace_flags *flag) - { - struct pstree_item *item; -@@ -2400,6 +2460,9 @@ skip_ns_bouncing: - if (restore_freezer_state()) - pr_err("Unable to restore freezer state\n"); - -+ /* just before releasing threads we have to restore rseq_cs */ -+ restore_rseq_cs(); -+ - /* Detaches from processes and they continue run through sigreturn. */ - if (finalize_restore_detach()) - goto out_kill_network_unlocked; -diff --git a/criu/include/pstree.h b/criu/include/pstree.h -index 458e5f9..97bef11 100644 ---- a/criu/include/pstree.h -+++ b/criu/include/pstree.h -@@ -25,6 +25,7 @@ struct pstree_item { - int nr_threads; /* number of threads */ - struct pid *threads; /* array of threads */ - CoreEntry **core; -+ RseqEntry *rseqe; - TaskKobjIdsEntry *ids; - union { - futex_t task_st; -diff --git a/images/rseq.proto b/images/rseq.proto -index be28004..45cb847 100644 ---- a/images/rseq.proto -+++ b/images/rseq.proto -@@ -6,4 +6,5 @@ message rseq_entry { - required uint64 rseq_abi_pointer = 1; - required uint32 rseq_abi_size = 2; - required uint32 signature = 3; -+ optional uint64 rseq_cs_pointer = 4; - } --- -2.34.1 - diff --git a/0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch b/0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch deleted file mode 100644 index 3fe2cc5e6f9491974efe7f1dd5014870577b7811..0000000000000000000000000000000000000000 --- a/0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch +++ /dev/null @@ -1,177 +0,0 @@ -From dc5f32571e66ab72842e735259d0c442ed1c603b Mon Sep 17 00:00:00 2001 -From: bb-cat -Date: Wed, 2 Mar 2022 15:10:24 +0800 -Subject: [PATCH 16/72] zdtm: add rseq02 transition test with NO_RESTART CS - flag Signed-off-by: Alexander Mikhalitsyn - - ---- - test/zdtm/transition/Makefile | 2 ++ - test/zdtm/transition/rseq01.c | 61 ++++++++++++++++++++++++++++++-- - test/zdtm/transition/rseq02.c | 1 + - test/zdtm/transition/rseq02.desc | 1 + - 4 files changed, 63 insertions(+), 2 deletions(-) - create mode 120000 test/zdtm/transition/rseq02.c - create mode 120000 test/zdtm/transition/rseq02.desc - -diff --git a/test/zdtm/transition/Makefile b/test/zdtm/transition/Makefile -index fae4e27..378a4fc 100644 ---- a/test/zdtm/transition/Makefile -+++ b/test/zdtm/transition/Makefile -@@ -24,6 +24,7 @@ TST_NOFILE = \ - pid_reuse \ - pidfd_store_sk \ - rseq01 \ -+ rseq02 \ - - - TST_FILE = \ -@@ -82,6 +83,7 @@ ptrace: LDFLAGS += -pthread - fork2: CFLAGS += -D FORK2 - thread-bomb.o: CFLAGS += -pthread - thread-bomb: LDFLAGS += -pthread -+rseq02: CFLAGS += -D NOABORT - - %: %.sh - cp $< $@ -diff --git a/test/zdtm/transition/rseq01.c b/test/zdtm/transition/rseq01.c -index 5fac5a6..25e1d61 100644 ---- a/test/zdtm/transition/rseq01.c -+++ b/test/zdtm/transition/rseq01.c -@@ -53,6 +53,18 @@ enum rseq_flags { - RSEQ_FLAG_UNREGISTER = (1 << 0), - }; - -+enum rseq_cs_flags_bit { -+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, -+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, -+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, -+}; -+ -+enum rseq_cs_flags { -+ RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), -+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), -+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), -+}; -+ - struct rseq { - uint32_t cpu_id_start; - uint32_t cpu_id; -@@ -104,6 +116,7 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu) - { - double a = 10000000000000000.0; - double b = -1; -+ uint64_t rseq_cs1, rseq_cs2; - /*test_msg("enter %f %f\n", a, b);*/ - - /* clang-format off */ -@@ -129,6 +142,9 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu) - "dec %%rcx\n\t" - "jnz 5b\n\t" - "fstpl %[y]\n\t" -+ "movq %%rax, %[rseq_cs_check2]\n\t" -+ "movq %[rseq_cs], %%rax\n\t" -+ "movq %%rax, %[rseq_cs_check1]\n\t" - "2:\n\t" - ".pushsection __rseq_failure, \"ax\"\n\t" - /* Disassembler-friendly signature: nopl (%rip). */ -@@ -143,6 +159,8 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu) - : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (rseq_ptr->cpu_id), - [rseq_cs] "m" (rseq_ptr->rseq_cs), -+ [rseq_cs_check1] "m" (rseq_cs1), -+ [rseq_cs_check2] "m" (rseq_cs2), - /* final store input */ - [v] "m" (*v), - [count] "er" (count), -@@ -153,8 +171,20 @@ static int rseq_addv(intptr_t *v, intptr_t count, int cpu) - ); - /* clang-format on */ - /*test_msg("exit %f %f\n", a, b);*/ -+ test_msg("%lx %lx\n", rseq_cs1, rseq_cs2); -+ if (rseq_cs1 != rseq_cs2) { -+ /* -+ * It means that we finished critical section -+ * *normally* (haven't jumped to abort) but the kernel had cleaned up -+ * rseq_ptr->rseq_cs before we left critical section -+ * and CRIU wasn't restored it correctly. -+ * That's a bug picture. -+ */ -+ return -1; -+ } - return 0; - abort: -+ test_msg("%lx %lx\n", rseq_cs1, rseq_cs2); - /*test_msg("abort %f %f\n", a, b);*/ - return -1; - } -@@ -177,21 +207,48 @@ int main(int argc, char *argv[]) - fail("calloc"); - exit(EXIT_FAILURE); - } -+ - register_thread(); - -+ /* -+ * We want to test that RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL -+ * is handled properly by CRIU, but that flag can be used -+ * only with all another flags set. -+ * Please, refer to -+ * https://github.com/torvalds/linux/blob/master/kernel/rseq.c#L192 -+ */ -+#ifdef NOABORT -+ rseq_ptr->flags = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | -+ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | -+ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE; -+#endif -+ - test_daemon(); - - while (test_go()) { - cpu = RSEQ_ACCESS_ONCE(rseq_ptr->cpu_id_start); - ret = rseq_addv(&cpu_data[cpu], 2, cpu); -- if (ret) -+#ifndef NOABORT -+ /* just ignore abort */ -+ ret = 0; -+#else -+ if (ret) { - fail("Failed to increment per-cpu counter"); -+ break; -+ } else { -+ //test_msg("cpu_data[%d] == %ld\n", cpu, (long int)cpu_data[cpu]); -+ } -+#endif - } - - test_waitsig(); - - check_thread(); -- pass(); -+ -+ if (ret) -+ fail(); -+ else -+ pass(); - - return 0; - } -diff --git a/test/zdtm/transition/rseq02.c b/test/zdtm/transition/rseq02.c -new file mode 120000 -index 0000000..d564917 ---- /dev/null -+++ b/test/zdtm/transition/rseq02.c -@@ -0,0 +1 @@ -+rseq01.c -\ No newline at end of file -diff --git a/test/zdtm/transition/rseq02.desc b/test/zdtm/transition/rseq02.desc -new file mode 120000 -index 0000000..b888f0d ---- /dev/null -+++ b/test/zdtm/transition/rseq02.desc -@@ -0,0 +1 @@ -+rseq01.desc -\ No newline at end of file --- -2.34.1 - diff --git a/0017-zdtm-fix-zdtm-static-maps00-case-in-arm64.patch b/0017-zdtm-fix-zdtm-static-maps00-case-in-arm64.patch deleted file mode 100644 index 1da618349d7e556f98891c8dc9a44ffa80fc5a5d..0000000000000000000000000000000000000000 --- a/0017-zdtm-fix-zdtm-static-maps00-case-in-arm64.patch +++ /dev/null @@ -1,56 +0,0 @@ -From 1f760a8bbb539e81b1ef48aeedbebb792d7b74b2 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Fri, 14 Jan 2022 16:39:32 +0800 -Subject: [PATCH 17/72] zdtm: fix zdtm/static/maps00 case in arm64 - -This case sometimes will cause SIGILL signal in arm64 platform. - -<> notes: - The ARM architecture does not require the hardware to ensure coherency - between instruction caches and memory, even for locations of shared - memory. - -Therefore, we need flush dcache and icache for self-modifying code. - -- https://developer.arm.com/documentation/den0024/a/Caches/Point-of-coherency-and-unification - -Signed-off-by: fu.lin ---- - test/zdtm/static/maps00.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/test/zdtm/static/maps00.c b/test/zdtm/static/maps00.c -index 10a4cac..5ef8f1a 100644 ---- a/test/zdtm/static/maps00.c -+++ b/test/zdtm/static/maps00.c -@@ -158,7 +158,8 @@ static int check_map(struct map *map) - - if (!sigsetjmp(segv_ret, 1)) { - if (map->prot & PROT_WRITE) { -- memcpy(map->ptr, test_func, getpagesize()); -+ memcpy(map->ptr,test_func, ONE_MAP_SIZE); -+ __builtin___clear_cache(map->ptr, map->ptr+ONE_MAP_SIZE); - } else { - if (!(map->flag & MAP_ANONYMOUS)) { - uint8_t funlen = (uint8_t *)check_map - (uint8_t *)test_func; -@@ -169,14 +170,15 @@ static int check_map(struct map *map) - } - } - } -- if (!(map->flag & MAP_ANONYMOUS) || map->prot & PROT_WRITE) -+ if (!(map->flag & MAP_ANONYMOUS) || (map->prot & PROT_WRITE)) - /* Function body has been copied into the mapping */ - ((int (*)(void))map->ptr)(); /* perform exec access */ -- else -+ else { - /* No way to copy function body into mapping, - * clear exec bit from effective protection - */ - prot &= PROT_WRITE | PROT_READ | !PROT_EXEC; -+ } - } else - prot &= PROT_WRITE | PROT_READ | !PROT_EXEC; - --- -2.34.1 - diff --git a/0018-test-flush-ipt-rules-after-program-exits.patch b/0018-test-flush-ipt-rules-after-program-exits.patch deleted file mode 100644 index 9d61eb383d328c76add6999d417a8501d0c5b263..0000000000000000000000000000000000000000 --- a/0018-test-flush-ipt-rules-after-program-exits.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 003edcab5c2dc1a3f00dba7f4b7bcdd017eb34b5 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Thu, 20 Jan 2022 19:45:14 +0800 -Subject: [PATCH 18/72] test: flush ipt rules after program exits - -Signed-off-by: fu.lin ---- - test/zdtm/static/socket-tcp-nfconntrack.desc | 2 +- - test/zdtm/static/socket-tcp.c | 13 +++++++++++++ - 2 files changed, 14 insertions(+), 1 deletion(-) - -diff --git a/test/zdtm/static/socket-tcp-nfconntrack.desc b/test/zdtm/static/socket-tcp-nfconntrack.desc -index add2513..05bdb49 100644 ---- a/test/zdtm/static/socket-tcp-nfconntrack.desc -+++ b/test/zdtm/static/socket-tcp-nfconntrack.desc -@@ -1 +1 @@ --{'flavor': 'h', 'opts': '--tcp-established', 'flags': 'suid'} -+{'flavor': 'h', 'opts': '--tcp-established', 'flags': 'suid excl'} -diff --git a/test/zdtm/static/socket-tcp.c b/test/zdtm/static/socket-tcp.c -index f6ef473..29b0fce 100644 ---- a/test/zdtm/static/socket-tcp.c -+++ b/test/zdtm/static/socket-tcp.c -@@ -57,6 +57,13 @@ int write_data(int fd, const unsigned char *buf, int size) - return 0; - } - -+#ifdef ZDTM_CONNTRACK -+static void ipt_flush(void) -+{ -+ system("iptables -w --flush"); -+} -+#endif -+ - int main(int argc, char **argv) - { - unsigned char buf[BUF_SIZE]; -@@ -72,6 +79,12 @@ int main(int argc, char **argv) - pr_perror("unshare"); - return 1; - } -+ -+ if (atexit(ipt_flush) != 0) { -+ pr_perror("atexit"); -+ return 1; -+ } -+ - if (system("ip link set up dev lo")) - return 1; - if (system("iptables -w -A INPUT -i lo -p tcp -m state --state NEW,ESTABLISHED -j ACCEPT")) --- -2.34.1 - diff --git a/0019-zdtm-fix-cleaning-step-of-zdtm_netns.patch b/0019-zdtm-fix-cleaning-step-of-zdtm_netns.patch deleted file mode 100644 index 5d7540cf811bca2901938c5f905d12675bfc7507..0000000000000000000000000000000000000000 --- a/0019-zdtm-fix-cleaning-step-of-zdtm_netns.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 5e68ba283e442467baef762bfcf87910d84e01ae Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Wed, 19 Jan 2022 10:01:25 +0800 -Subject: [PATCH 19/72] zdtm: fix cleaning step of zdtm_netns - -Signed-off-by: fu.lin ---- - test/zdtm.py | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/test/zdtm.py b/test/zdtm.py -index 0a52e1b..0feece0 100755 ---- a/test/zdtm.py -+++ b/test/zdtm.py -@@ -1,4 +1,6 @@ - #!/usr/bin/env python -+# -*- coding: utf-8 -*- -+ - from __future__ import absolute_import, division, print_function, unicode_literals - - import argparse -@@ -2110,7 +2112,8 @@ class Launcher: - - if self.__fail: - print_sep("FAIL", "#") -- sys.exit(1) -+ -+ return self.__fail - - - def all_tests(opts): -@@ -2375,10 +2378,11 @@ def run_tests(opts): - else: - launcher.skip(t, "no flavors") - finally: -- launcher.finish() -+ fail = launcher.finish() - if opts['join_ns']: - subprocess.Popen(["ip", "netns", "delete", "zdtm_netns"]).wait() -- -+ if fail: -+ sys.exit(1) - - sti_fmt = "%-40s%-10s%s" - --- -2.34.1 - diff --git a/0021-pid-add-pid-recover-method-for-criu.patch b/0021-pid-add-pid-recover-method-for-criu.patch deleted file mode 100644 index b7a9467b8eb1ea69f3620d975712f1af789d2389..0000000000000000000000000000000000000000 --- a/0021-pid-add-pid-recover-method-for-criu.patch +++ /dev/null @@ -1,213 +0,0 @@ -From 2911f505eefcfaea582d457c1fa18df34d151954 Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 19 May 2021 21:33:22 +0800 -Subject: [PATCH 21/72] pid: add pid recover method for criu - -The default pid recover method cannot recover the task -pid at every time. -We add a new pid recover method by setting the fork_pid of -the parent task struct, add the kernel will alloc pid by -the fork_pid. -The new pid recover method can also avoid other tasks using -the dumping task pids. - -Signed-off-by: Jingxian He ---- - criu/config.c | 1 + - criu/cr-restore.c | 27 ++++++++++++++++++++++++++- - criu/crtools.c | 1 + - criu/include/cr_options.h | 1 + - criu/include/pin-mem.h | 4 ++++ - criu/include/restorer.h | 1 + - criu/pie/restorer.c | 25 ++++++++++++++++++++++++- - 7 files changed, 58 insertions(+), 2 deletions(-) - -diff --git a/criu/config.c b/criu/config.c -index 53a5cfd..6dfbb01 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -699,6 +699,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - BOOL_OPT("pin-memory", &opts.pin_memory), - { "lsm-mount-context", required_argument, 0, 1099 }, - { "network-lock", required_argument, 0, 1100 }, -+ BOOL_OPT("use-fork-pid", &opts.use_fork_pid), - {}, - }; - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 5514c29..497dd14 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -80,6 +80,7 @@ - #include "timens.h" - #include "bpfmap.h" - #include "apparmor.h" -+#include "pin-mem.h" - - #include "parasite-syscall.h" - #include "files-reg.h" -@@ -1340,6 +1341,23 @@ static int set_next_pid(void *arg) - return 0; - } - -+static int write_fork_pid(int pid) -+{ -+ int fd, ret; -+ -+ fd = open(PIN_MEM_FILE, O_RDWR); -+ if (fd < 0) { -+ pr_warn("error open file: %s\n", PIN_MEM_FILE); -+ return -1; -+ } -+ ret = ioctl(fd, SET_FORK_PID, &pid); -+ if (ret < 0) { -+ pr_warn("write fork pid fail, errno: %s\n", strerror(errno)); -+ } -+ close(fd); -+ return ret; -+} -+ - static inline int fork_with_pid(struct pstree_item *item) - { - struct cr_clone_arg ca; -@@ -1424,7 +1442,7 @@ static inline int fork_with_pid(struct pstree_item *item) - if (!(ca.clone_flags & CLONE_NEWPID)) { - lock_last_pid(); - -- if (!kdat.has_clone3_set_tid) { -+ if (!kdat.has_clone3_set_tid && !opts.use_fork_pid) { - if (external_pidns) { - /* - * Restoring into another namespace requires a helper -@@ -1434,6 +1452,12 @@ static inline int fork_with_pid(struct pstree_item *item) - */ - ret = call_in_child_process(set_next_pid, (void *)&pid); - } else { -+ if (opts.use_fork_pid) { -+ ret = write_fork_pid(pid); -+ if (ret < 0) -+ goto err_unlock; -+ } -+ - ret = set_next_pid((void *)&pid); - } - if (ret != 0) { -@@ -3886,6 +3910,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns - task_args->thread_args); - - task_args->pin_memory = opts.pin_memory; -+ task_args->use_fork_pid = opts.use_fork_pid; - - /* - * An indirect call to task_restore, note it never returns -diff --git a/criu/crtools.c b/criu/crtools.c -index 1b90481..502acdf 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -448,6 +448,7 @@ usage: - " --with-cpu-affinity Allow to restore cpu affinity. Only for hosts with\n" - " same cpu quantity.\n" - " --pin-memory Use pin memory method for checkpoint and restore.\n" -+ " --use-fork-pid Allow to restore task pid by setting fork pid of task struct.\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index 61898fd..923cc5f 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -191,6 +191,7 @@ struct cr_options { - /* restore cpu affinity */ - int with_cpu_affinity; - int pin_memory; -+ int use_fork_pid; - }; - - extern struct cr_options opts; -diff --git a/criu/include/pin-mem.h b/criu/include/pin-mem.h -index 7e53b12..2b54996 100644 ---- a/criu/include/pin-mem.h -+++ b/criu/include/pin-mem.h -@@ -6,6 +6,7 @@ - #include "vma.pb-c.h" - - #if __has_include("linux/pin_memory.h") -+# define CONFIG_PID_RESERVE - # include - #else - -@@ -35,6 +36,9 @@ struct pin_mem_area_set { - struct _pin_mem_area mem_area[MAX_PIN_MEM_AREA_NUM]; - }; - -+#define _SET_FORK_PID 8 -+#define SET_FORK_PID _IOW(PIN_MEM_MAGIC, _SET_FORK_PID, int) -+ - #endif /* __has_include("linux/pin_memory.h") */ - - #define PIN_MEM_FILE "/dev/pinmem" -diff --git a/criu/include/restorer.h b/criu/include/restorer.h -index e0bdc04..93f87f4 100644 ---- a/criu/include/restorer.h -+++ b/criu/include/restorer.h -@@ -233,6 +233,7 @@ struct task_restore_args { - int child_subreaper; - bool has_clone3_set_tid; - bool pin_memory; -+ bool use_fork_pid; - } __aligned(64); - - /* -diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c -index db01ba5..1317582 100644 ---- a/criu/pie/restorer.c -+++ b/criu/pie/restorer.c -@@ -1426,6 +1426,22 @@ int remap_vmas(int pid) - return ret; - } - -+int write_fork_pid(int pid) -+{ -+ int fd, ret; -+ -+ fd = sys_open(PIN_MEM_FILE, O_RDWR, 0); -+ if (fd < 0) { -+ pr_warn("error open file: %s\n", PIN_MEM_FILE); -+ return -1; -+ } -+ ret = sys_ioctl(fd, SET_FORK_PID, (unsigned long) &pid); -+ if (ret < 0) { -+ pr_warn("write fork pid fail fail: %d\n", pid); -+ } -+ sys_close(fd); -+ return ret; -+} - - /* - * The main routine to restore task via sigreturn. -@@ -1815,7 +1831,7 @@ long __export_restore_task(struct task_restore_args *args) - long parent_tid; - int i, fd = -1; - -- if (!args->has_clone3_set_tid) { -+ if (!args->has_clone3_set_tid && !args->use_fork_pid) { - /* One level pid ns hierarhy */ - fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0); - if (fd < 0) { -@@ -1847,6 +1863,13 @@ long __export_restore_task(struct task_restore_args *args) - pr_debug("Using clone3 to restore the process\n"); - RUN_CLONE3_RESTORE_FN(ret, c_args, sizeof(c_args), &thread_args[i], - args->clone_restore_fn); -+ } else if (args->use_fork_pid) { -+ if (write_fork_pid(thread_args[i].pid) < 0) { -+ pr_err("Clone fail with fork pid\n"); -+ mutex_unlock(&task_entries_local->last_pid_mutex); -+ goto core_restore_end; -+ } -+ RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); - } else { - last_pid_len = - std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); --- -2.34.1 - diff --git a/0022-notifier-add-notifier-calling-method-for-checkpoint-.patch b/0022-notifier-add-notifier-calling-method-for-checkpoint-.patch deleted file mode 100644 index 318c53b0bbd6ff0c282fd1c11c9a61456809fec4..0000000000000000000000000000000000000000 --- a/0022-notifier-add-notifier-calling-method-for-checkpoint-.patch +++ /dev/null @@ -1,621 +0,0 @@ -From 33c351e18eddc2517f799c1cac20790ebabddbc8 Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 19 May 2021 21:45:03 +0800 -Subject: [PATCH 22/72] notifier: add notifier calling method for checkpoint - and restore - -Add notifier calling method for checkpoint and restore during kernel module upgrading. - -Signed-off-by: Xiaoguang Li -Signed-off-by: He Jingxian -Signed-off-by: fu.lin ---- - criu/config.c | 1 + - criu/cr-dump.c | 34 +++++++++++ - criu/cr-restore.c | 18 +++++- - criu/crtools.c | 2 + - criu/include/cr_options.h | 1 + - criu/include/notifier.h | 44 +++++++++++++++ - criu/include/restorer.h | 1 + - criu/include/util.h | 2 + - criu/pie/restorer.c | 116 ++++++++++++++++++++++++++++++++++---- - criu/pie/util.c | 91 ++++++++++++++++++++++++++++++ - 10 files changed, 297 insertions(+), 13 deletions(-) - create mode 100644 criu/include/notifier.h - -diff --git a/criu/config.c b/criu/config.c -index 6dfbb01..5d1cff6 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -700,6 +700,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - { "lsm-mount-context", required_argument, 0, 1099 }, - { "network-lock", required_argument, 0, 1100 }, - BOOL_OPT("use-fork-pid", &opts.use_fork_pid), -+ BOOL_OPT("with-notifier", &opts.with_notifier_kup), - {}, - }; - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index 5fac9ce..50a2f9b 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -87,6 +87,7 @@ - #include "apparmor.h" - #include "asm/dump.h" - #include "pin-mem.h" -+#include "notifier.h" - - /* - * Architectures can overwrite this function to restore register sets that -@@ -1981,6 +1982,8 @@ static int cr_lazy_mem_dump(void) - return ret; - } - -+static enum notifier_state notifier_state = NOTHING_COMPLETE; -+ - static int cr_dump_finish(int ret) - { - int post_dump_ret = 0; -@@ -2067,6 +2070,20 @@ static int cr_dump_finish(int ret) - clear_pin_mem(0); - } - -+ if (ret != 0 && opts.with_notifier_kup) { -+ pr_info("call notifier rollback\n"); -+ switch (notifier_state) { -+ case PRE_FREEZE_COMPLETE: -+ notifier_kup(PRE_FREEZE, ROLLBACK, true); -+ break; -+ case FREEZE_TO_KILL_COMPLETE: -+ notifier_kup(FREEZE_TO_KILL, ROLLBACK, true); -+ break; -+ default: -+ break; -+ } -+ } -+ - if (ret) { - pr_err("Dumping FAILED.\n"); - } else { -@@ -2100,6 +2117,14 @@ int cr_dump_tasks(pid_t pid) - goto err; - root_item->pid->real = pid; - -+ if (notifier_kup(PRE_FREEZE, PREPARE, opts.with_notifier_kup)) { -+ /* disable rollback function because we has already rollbacked. */ -+ opts.with_notifier_kup = false; -+ pr_err("call notifier: %d err\n", PRE_FREEZE); -+ goto err; -+ } else -+ notifier_state = PRE_FREEZE_COMPLETE; -+ - pre_dump_ret = run_scripts(ACT_PRE_DUMP); - if (pre_dump_ret != 0) { - pr_err("Pre dump script failed with %d!\n", pre_dump_ret); -@@ -2258,6 +2283,15 @@ int cr_dump_tasks(pid_t pid) - ret = write_img_inventory(&he); - if (ret) - goto err; -+ -+ ret = notifier_kup(FREEZE_TO_KILL, PREPARE, opts.with_notifier_kup); -+ if (ret) { -+ opts.with_notifier_kup = false; -+ pr_err("call notifier:%d err\n", FREEZE_TO_KILL); -+ goto err; -+ } else -+ notifier_state = FREEZE_TO_KILL_COMPLETE; -+ - err: - if (parent_ie) - inventory_entry__free_unpacked(parent_ie, NULL); -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 497dd14..03511b6 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -81,6 +81,7 @@ - #include "bpfmap.h" - #include "apparmor.h" - #include "pin-mem.h" -+#include "notifier.h" - - #include "parasite-syscall.h" - #include "files-reg.h" -@@ -1951,6 +1952,7 @@ static int restore_task_with_children(void *_arg) - return 0; - - err: -+ do_notifier_rollback(opts.with_notifier_kup, POST_UPDATE_KERNEL_COMPLETE); - if (current->parent == NULL) - futex_abort_and_wake(&task_entries->nr_in_progress); - exit(1); -@@ -2451,8 +2453,10 @@ skip_ns_bouncing: - */ - attach_to_tasks(root_seized); - -- if (restore_switch_stage(CR_STATE_RESTORE_CREDS)) -+ if (restore_switch_stage(CR_STATE_RESTORE_CREDS)) { -+ pr_err("Can't switch to CR_STATE_RESTORE_CREDS stage\n"); - goto out_kill_network_unlocked; -+ } - - timing_stop(TIME_RESTORE); - -@@ -2631,6 +2635,15 @@ int cr_restore_tasks(void) - goto clean_cgroup; - - ret = restore_root_task(root_item); -+ if (ret) -+ goto err; -+ -+ ret = notifier_kup(POST_RUN, PREPARE, opts.with_notifier_kup); -+ if (ret < 0) { -+ opts.with_notifier_kup = false; -+ pr_err("calling POST_RUN notifier list return err\n"); -+ } -+ - clean_cgroup: - fini_cgroup(); - err: -@@ -3922,6 +3935,9 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns - err: - free_mappings(&self_vmas); - err_nv: -+ if (current->parent == NULL && opts.with_notifier_kup) -+ do_notifier_rollback(true, POST_UPDATE_KERNEL_COMPLETE); -+ - /* Just to be sure */ - exit(1); - return -1; -diff --git a/criu/crtools.c b/criu/crtools.c -index 502acdf..1d08620 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -449,6 +449,8 @@ usage: - " same cpu quantity.\n" - " --pin-memory Use pin memory method for checkpoint and restore.\n" - " --use-fork-pid Allow to restore task pid by setting fork pid of task struct.\n" -+ " --with-notifier Allow to checkpoint/restore kup notifier chain.\n" -+ " This feature needs the kernel assistance.\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index 923cc5f..039edba 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -192,6 +192,7 @@ struct cr_options { - int with_cpu_affinity; - int pin_memory; - int use_fork_pid; -+ int with_notifier_kup; - }; - - extern struct cr_options opts; -diff --git a/criu/include/notifier.h b/criu/include/notifier.h -new file mode 100644 -index 0000000..e4972a7 ---- /dev/null -+++ b/criu/include/notifier.h -@@ -0,0 +1,44 @@ -+#ifndef __CRIU_NOTIFIER_H__ -+#define __CRIU_NOTIFIER_H__ -+ -+#define NOTIFY_PROC_PATH "/sys/kernel/modrestore/nvwa_notifier" -+ -+#if __has_include("linux/modrestore.h") -+# define CONFIG_EULEROS_MODRESTORE_NOTIFY /* useless, historical factors */ -+# include -+#else -+enum KUP_HOOK_POINT { -+ PRE_FREEZE, -+ FREEZE_TO_KILL, -+ PRE_UPDATE_KERNEL, -+ POST_UPDATE_KERNEL, -+ UNFREEZE_TO_RUN, -+ POST_RUN, -+ -+ KUP_HOOK_MAX, -+}; -+ -+enum nvwa_cmd { -+ PREPARE = 0, -+ ROLLBACK, -+ -+ NVWA_CMD_MAX, -+}; -+#endif -+ -+enum notifier_state { -+ NOTHING_COMPLETE, -+ PRE_FREEZE_COMPLETE, -+ FREEZE_TO_KILL_COMPLETE, -+ PRE_UPDATE_KERNEL_COMPLETE, -+ POST_UPDATE_KERNEL_COMPLETE, -+ UNFREEZE_TO_RUN_COMPLETE, -+ POST_RUN_COMPLETE, -+ -+ NOTIFIER_ROLLBACK_DONE = 0xfc17173b, /* has done rollback */ -+}; -+ -+int notifier_kup(enum KUP_HOOK_POINT, enum nvwa_cmd, bool); -+void do_notifier_rollback(bool, enum notifier_state); -+ -+#endif /* __CRIU_NOTIFIER_H__ */ -diff --git a/criu/include/restorer.h b/criu/include/restorer.h -index 93f87f4..2f7345b 100644 ---- a/criu/include/restorer.h -+++ b/criu/include/restorer.h -@@ -234,6 +234,7 @@ struct task_restore_args { - bool has_clone3_set_tid; - bool pin_memory; - bool use_fork_pid; -+ bool with_notifier_kup; - } __aligned(64); - - /* -diff --git a/criu/include/util.h b/criu/include/util.h -index 1c0b3c7..e0049a6 100644 ---- a/criu/include/util.h -+++ b/criu/include/util.h -@@ -13,6 +13,8 @@ - #include - #include - #include -+#include -+#include - - #include "int.h" - #include "common/compiler.h" -diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c -index 1317582..4a1d38d 100644 ---- a/criu/pie/restorer.c -+++ b/criu/pie/restorer.c -@@ -36,6 +36,7 @@ - #include "vma.h" - #include "uffd.h" - #include "sched.h" -+#include "notifier.h" - - #include "common/lock.h" - #include "common/page.h" -@@ -77,6 +78,7 @@ - - static struct task_entries *task_entries_local; - static futex_t thread_inprogress; -+static futex_t thread_start; - static pid_t *helpers; - static int n_helpers; - static pid_t *zombies; -@@ -118,10 +120,28 @@ void parasite_cleanup(void) - - extern void cr_restore_rt(void) asm("__cr_restore_rt") __attribute__((visibility("hidden"))); - -+static int args_with_notifier_kup; -+static enum notifier_state notifier_state = POST_UPDATE_KERNEL_COMPLETE; -+static futex_t notifier_done; -+ - static void sigchld_handler(int signal, siginfo_t *siginfo, void *data) - { - char *r; - int i; -+ rt_sigaction_t act; -+ -+ if (signal == SIGSEGV || signal == SIGBUS || signal == SIGILL) { -+ /* Make sure we exit with the right signal at the end. So for instance -+ * the core will be dumped if enabled. */ -+ pr_info("recv signal: %d\n", signal); -+ do_notifier_rollback(args_with_notifier_kup, notifier_state); -+ ksigemptyset (&act.rt_sa_mask); -+ act.rt_sa_flags = SA_SIGINFO | SA_RESTART; -+ act.rt_sa_handler = (rt_sighandler_t)SIG_DFL; -+ sys_sigaction(signal, &act, NULL, sizeof(k_rtsigset_t)); -+ sys_kill(sys_getpid(),signal); -+ return; -+ } - - /* We can ignore helpers that die, we expect them to after - * CR_STATE_RESTORE is finished. */ -@@ -148,10 +168,14 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data) - - pr_info("Task %d %s %d\n", siginfo->si_pid, r, siginfo->si_status); - -+ pr_info("%s: trace do_notifier_rollback\n", __func__); -+ do_notifier_rollback(args_with_notifier_kup, notifier_state); - futex_abort_and_wake(&task_entries_local->nr_in_progress); - /* sa_restorer may be unmaped, so we can't go back to userspace*/ - sys_kill(sys_getpid(), SIGSTOP); - sys_exit_group(1); -+ -+ /* for notifier, do nothing when receiving SIGCHLD signal */ - } - - static int lsm_set_label(char *label, char *type, int procfd) -@@ -616,6 +640,27 @@ static void noinline rst_sigreturn(unsigned long new_sp, struct rt_sigframe *sig - ARCH_RT_SIGRETURN(new_sp, sigframe); - } - -+/* Notice: only one task, so it isn't necessary to consider concurrent. */ -+static int do_notifier(bool *notify) -+{ -+ int retval = 0; -+ -+ if (!*notify) -+ return 0; -+ -+ pr_info("unfreeze_to_run restore notifier\n"); -+ retval = notifier_kup(UNFREEZE_TO_RUN, PREPARE, true); -+ if (retval) { -+ *notify = false; -+ notifier_state = NOTIFIER_ROLLBACK_DONE; -+ pr_err("call notifier: %d err\n", UNFREEZE_TO_RUN); -+ } -+ -+ notifier_state = UNFREEZE_TO_RUN_COMPLETE; -+ -+ return retval; -+} -+ - /* - * Threads restoration via sigreturn. Note it's locked - * routine and calls for unlock at the end. -@@ -654,12 +699,18 @@ long __export_restore_thread(struct thread_restore_args *args) - - pr_info("%ld: Restored\n", sys_gettid()); - -- restore_finish_stage(task_entries_local, CR_STATE_RESTORE); -+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE) & FUTEX_ABORT_FLAG)) { -+ pr_err("%s: abort by CR_STATE_RESTORE\n", __func__); -+ goto core_restore_end; -+ } - - if (restore_signals(args->siginfo, args->siginfo_n, false)) - goto core_restore_end; - -- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD); -+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD) & FUTEX_ABORT_FLAG)) { -+ pr_err("%s: abort by CR_STATE_RESTORE_SIGCHLD\n", __func__); -+ goto core_restore_end; -+ } - - /* - * Make sure it's before creds, since it's privileged -@@ -674,16 +725,29 @@ long __export_restore_thread(struct thread_restore_args *args) - if (ret) - BUG(); - -- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS); -+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS) & FUTEX_ABORT_FLAG)) { -+ pr_err("%s: abort by CR_STATE_RESTORE_CREDS\n", __func__); -+ goto core_restore_end; -+ } - - futex_dec_and_wake(&thread_inprogress); -+ futex_wait_while(&thread_start, 0); -+ if (!!(futex_get(&thread_start) & FUTEX_ABORT_FLAG)) { -+ pr_err("%s: abort by thread_start\n", __func__); -+ goto wait_notifier; -+ } - - new_sp = (long)rt_sigframe + RT_SIGFRAME_OFFSET(rt_sigframe); - rst_sigreturn(new_sp, rt_sigframe); - - core_restore_end: -- pr_err("Restorer abnormal termination for %ld\n", sys_getpid()); -- futex_abort_and_wake(&task_entries_local->nr_in_progress); -+ futex_abort_and_wake(&thread_start); -+ futex_abort_and_wake(&task_entries_local->start); -+ -+wait_notifier: -+ pr_err("%s: Restorer abnormal termination for %ld\n", __func__, sys_getpid()); -+ futex_wait_while(¬ifier_done, 0); -+ - sys_exit_group(1); - return -1; - } -@@ -1465,6 +1529,10 @@ long __export_restore_task(struct task_restore_args *args) - rt_sigaction_t act; - bool has_vdso_proxy; - -+ futex_set(&thread_inprogress, 1); -+ futex_set(&thread_start, 0); -+ futex_set(¬ifier_done, 0); -+ - bootstrap_start = args->bootstrap_start; - bootstrap_len = args->bootstrap_len; - -@@ -1481,6 +1549,7 @@ long __export_restore_task(struct task_restore_args *args) - #ifdef ARCH_HAS_LONG_PAGES - __page_size = args->page_size; - #endif -+ args_with_notifier_kup = args->with_notifier_kup; - - ksigfillset(&act.rt_sa_mask); - act.rt_sa_handler = sigchld_handler; -@@ -1895,7 +1964,8 @@ long __export_restore_task(struct task_restore_args *args) - pr_err("Unable to create a thread: %ld\n", ret); - mutex_unlock(&task_entries_local->last_pid_mutex); - goto core_restore_end; -- } -+ } else -+ futex_inc(&thread_inprogress); - } - - mutex_unlock(&task_entries_local->last_pid_mutex); -@@ -1919,7 +1989,14 @@ long __export_restore_task(struct task_restore_args *args) - - pr_info("%ld: Restored\n", sys_getpid()); - -- restore_finish_stage(task_entries_local, CR_STATE_RESTORE); -+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE) & FUTEX_ABORT_FLAG)) { -+ pr_err("%s: abort by CR_STATE_RESTORE\n", __func__); -+ goto core_restore_end; -+ } -+ -+ ret = do_notifier(&args->with_notifier_kup); -+ if (ret) -+ goto core_restore_end; - - if (wait_helpers(args) < 0) - goto core_restore_end; -@@ -1965,7 +2042,8 @@ long __export_restore_task(struct task_restore_args *args) - if (ret) - goto core_restore_end; - -- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD); -+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD) & FUTEX_ABORT_FLAG)) -+ goto core_restore_end; - - rst_tcp_socks_all(args); - -@@ -1986,15 +2064,20 @@ long __export_restore_task(struct task_restore_args *args) - ret = ret || restore_pdeath_sig(args->t); - ret = ret || restore_child_subreaper(args->child_subreaper); - -- futex_set_and_wake(&thread_inprogress, args->nr_threads); -- -- restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS); -+ if (!!(restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS) & FUTEX_ABORT_FLAG)) -+ goto core_restore_end; - - if (ret) - BUG(); - - /* Wait until children stop to use args->task_entries */ - futex_wait_while_gt(&thread_inprogress, 1); -+ if (!!(futex_get(&thread_start) & FUTEX_ABORT_FLAG)) { -+ pr_err("%s: terminate by main thread futex_start\n", __func__); -+ goto handle_notifier; -+ } -+ -+ futex_set_and_wake(&thread_start, 1); - - sys_close(args->proc_fd); - std_log_set_fd(-1); -@@ -2030,8 +2113,17 @@ long __export_restore_task(struct task_restore_args *args) - rst_sigreturn(new_sp, rt_sigframe); - - core_restore_end: -- futex_abort_and_wake(&task_entries_local->nr_in_progress); -+ futex_abort_and_wake(&thread_start); -+ futex_abort_and_wake(&task_entries_local->start); -+ -+handle_notifier: -+ do_notifier_rollback(args->with_notifier_kup, notifier_state); -+ -+ futex_abort_and_wake(&task_entries_local->nr_in_progress); /* notifier the criu main process */ - pr_err("Restorer fail %ld\n", sys_getpid()); -+ -+ futex_set_and_wake(¬ifier_done, 1); /* wake all other threads to exit */ -+ - sys_exit_group(1); - return -1; - } -diff --git a/criu/pie/util.c b/criu/pie/util.c -index e7a5a9f..9871db7 100644 ---- a/criu/pie/util.c -+++ b/criu/pie/util.c -@@ -11,6 +11,7 @@ - #include "fcntl.h" - #include "log.h" - #include "util-pie.h" -+#include "notifier.h" - - #ifdef CR_NOGLIBC - #include -@@ -52,3 +53,93 @@ err_close: - __sys(close)(fd); - return -1; - } -+ -+#define KUP_BUF_SIZE 256 -+ -+static int int_to_string(unsigned number, char *buf, size_t total) { -+ unsigned remainder, quotient, i, len; -+ -+ quotient = number; -+ len = 0; -+ do { -+ quotient /= 10; -+ len += 1; -+ } while (quotient > 0); -+ -+ if (len > total - 1) -+ return -1; -+ -+ quotient = number; -+ i = 1; -+ do { -+ remainder = quotient % 10; -+ quotient = quotient / 10; -+ buf[len-i] = '0' + remainder; -+ i++; -+ } while (quotient > 0); -+ buf[len] = '\0'; -+ -+ return len == 0 ? -1 : len; -+} -+ -+int notifier_kup(enum KUP_HOOK_POINT action, enum nvwa_cmd cmd, bool enable) -+{ -+ int fd, count = 0, retval = 0; -+ char buf[KUP_BUF_SIZE] = {0}; -+ -+ if (!enable) -+ return 0; -+ -+ fd = __sys(open)(NOTIFY_PROC_PATH, O_WRONLY, 0); -+ if (fd == -EACCES) { -+ /* there is no priviledge to open file, ignore this condition. */ -+ pr_info("%s: open %s failed, retval: %d (-EACCES)\n", -+ __func__, NOTIFY_PROC_PATH, -EACCES); -+ return 0; -+ } else if (fd < 0) { -+ __pr_perror("%s: Can't open %s: %d\n", __func__, NOTIFY_PROC_PATH, fd); -+ return fd; -+ } -+ -+ retval = int_to_string(action, buf, sizeof(buf)-count); -+ if (retval <= 0) { -+ __pr_perror("%s: int_to_string error\n", __func__); -+ goto err_close; -+ } -+ -+ buf[retval] = ':'; -+ count = retval + 1; -+ -+ retval = int_to_string(cmd, buf+count, sizeof(buf)-count); -+ if (retval <= 0) { -+ __pr_perror("%s: int_to_string error\n", __func__); -+ goto err_close; -+ } -+ -+ count += retval; -+ retval = __sys(write)(fd, buf, count); -+ if (retval < 0) -+ __pr_perror("%s: Can't write to %s\n", __func__, NOTIFY_PROC_PATH); -+ -+err_close: -+ __sys(close)(fd); -+ -+ return retval < 0 ? -1 : 0; -+} -+ -+void do_notifier_rollback(bool rollback, enum notifier_state status) -+{ -+ if (!rollback) -+ return; -+ -+ switch (status) { -+ case POST_UPDATE_KERNEL_COMPLETE: -+ notifier_kup(POST_UPDATE_KERNEL, ROLLBACK, true); -+ break; -+ case UNFREEZE_TO_RUN_COMPLETE: -+ notifier_kup(UNFREEZE_TO_RUN, ROLLBACK, true); -+ break; -+ default: -+ break; -+ } -+} --- -2.34.1 - diff --git a/0023-block-device-dump-block-device-as-reguler-file.patch b/0023-block-device-dump-block-device-as-reguler-file.patch deleted file mode 100644 index 678cfdf5040852dda33392330c83ada5c971a5e9..0000000000000000000000000000000000000000 --- a/0023-block-device-dump-block-device-as-reguler-file.patch +++ /dev/null @@ -1,62 +0,0 @@ -From 48c6f11d0b3c5f0549ff52cce0c8ce31ad67518f Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 19 May 2021 21:49:15 +0800 -Subject: [PATCH 23/72] block-device: dump block device as reguler file - -Add block device dump and restore method for kernel module upgrading. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: Xiaoguang Li ---- - criu/files.c | 27 +++++++++++++++++++++++++++ - 1 file changed, 27 insertions(+) - -diff --git a/criu/files.c b/criu/files.c -index 93754fb..f262d80 100644 ---- a/criu/files.c -+++ b/criu/files.c -@@ -442,6 +442,30 @@ static const struct fdtype_ops *get_mem_dev_ops(struct fd_parms *p, int minor) - return ops; - } - -+static int dump_blkdev(struct fd_parms *p, int lfd, FdinfoEntry *e) -+{ -+ struct fd_link *link_old = p->link; -+ int maj = major(p->stat.st_rdev); -+ const struct fdtype_ops *ops; -+ int err; -+ -+ switch (maj) { -+ case SCSI_DISK0_MAJOR: -+ ops = ®file_dump_ops; -+ break; -+ default: { -+ char more[32] = "block_dev"; -+ -+ err = dump_unsupp_fd(p, lfd, "blk", more, e); -+ p->link = link_old; -+ return err; -+ } -+ } -+ err = do_dump_gen_file(p, lfd, ops, e); -+ p->link = link_old; -+ return err; -+} -+ - static int dump_chrdev(struct fd_parms *p, int lfd, FdinfoEntry *e) - { - struct fd_link *link_old = p->link; -@@ -508,6 +532,9 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, - p.fd_ctl = ctl; /* Some dump_opts require this to talk to parasite */ - p.dfds = dfds; /* epoll needs to verify if target fd exist */ - -+ if (S_ISBLK(p.stat.st_mode)) -+ return dump_blkdev(&p, lfd, e); -+ - if (S_ISSOCK(p.stat.st_mode)) - return dump_socket(&p, lfd, e); - --- -2.34.1 - diff --git a/0024-anon-inode-add-support-for-anon-inode-fd.patch b/0024-anon-inode-add-support-for-anon-inode-fd.patch deleted file mode 100644 index 0e2bfc4e13d333e15e0ddf0d8b331b3cf31a6b8c..0000000000000000000000000000000000000000 --- a/0024-anon-inode-add-support-for-anon-inode-fd.patch +++ /dev/null @@ -1,316 +0,0 @@ -From 9bb9af3189ae8a7eadf975befa2aa30b7227259e Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 19 May 2021 21:52:49 +0800 -Subject: [PATCH 24/72] anon-inode: add support for anon inode fd - -Add support for anon inode fd dump and restore during module upgrade. - -Signed-off-by: Xiaoguang Li -Signed-off-by: Jingxian He -Signed-off-by: fu.lin ---- - criu/cr-restore.c | 3 +++ - criu/files-reg.c | 3 ++- - criu/include/image.h | 1 + - criu/include/mem.h | 1 + - criu/include/restorer.h | 6 ++++++ - criu/mem.c | 23 +++++++++++++++++++++++ - criu/pie/restorer.c | 37 +++++++++++++++++++++++++++++++++++++ - criu/proc_parse.c | 31 ++++++++++++++++++++++++++++--- - images/vma.proto | 1 + - 9 files changed, 102 insertions(+), 4 deletions(-) - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 03511b6..b805265 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -971,6 +971,8 @@ static int restore_one_alive_task(int pid, CoreEntry *core) - if (prepare_vmas(current, ta)) - return -1; - -+ if (prepare_vma_names(current, ta)) -+ return -1; - /* - * Sockets have to be restored in their network namespaces, - * so a task namespace has to be restored after sockets. -@@ -3733,6 +3735,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns - #endif - - RST_MEM_FIXUP_PPTR(task_args->vmas); -+ RST_MEM_FIXUP_PPTR(task_args->vma_names); - RST_MEM_FIXUP_PPTR(task_args->rings); - RST_MEM_FIXUP_PPTR(task_args->tcp_socks); - RST_MEM_FIXUP_PPTR(task_args->timerfd); -diff --git a/criu/files-reg.c b/criu/files-reg.c -index ee54d1d..fbdf811 100644 ---- a/criu/files-reg.c -+++ b/criu/files-reg.c -@@ -2137,7 +2137,7 @@ int do_open_reg_noseek_flags(int ns_root_fd, struct reg_file_info *rfi, void *ar - - /* unnamed temporary files are restored as ghost files */ - flags &= ~O_TMPFILE; -- -+ pr_info("openat path is: %s\n", rfi->path); - fd = openat(ns_root_fd, rfi->path, flags); - if (fd < 0) { - pr_perror("Can't open file %s on restore", rfi->path); -@@ -2307,6 +2307,7 @@ int collect_filemap(struct vma_area *vma) - if (!fd) - return -1; - -+ pr_info("find fd for %lx, shmid: %lx\n", vma->e->start, vma->e->shmid); - vma->vmfd = fd; - vma->vm_open = open_filemap; - return 0; -diff --git a/criu/include/image.h b/criu/include/image.h -index 14659db..f598de7 100644 ---- a/criu/include/image.h -+++ b/criu/include/image.h -@@ -84,6 +84,7 @@ - #define VMA_AREA_VVAR (1 << 12) - #define VMA_AREA_AIORING (1 << 13) - #define VMA_AREA_MEMFD (1 << 14) -+#define VMA_AREA_ANON_INODE (1 << 15) - - #define VMA_CLOSE (1 << 28) - #define VMA_NO_PROT_WRITE (1 << 29) -diff --git a/criu/include/mem.h b/criu/include/mem.h -index 03574ea..ccf8da6 100644 ---- a/criu/include/mem.h -+++ b/criu/include/mem.h -@@ -45,6 +45,7 @@ extern int parasite_dump_pages_seized(struct pstree_item *item, struct vm_area_l - struct task_restore_args; - int open_vmas(struct pstree_item *t); - int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta); -+int prepare_vma_names(struct pstree_item *t, struct task_restore_args *ta); - int unmap_guard_pages(struct pstree_item *t); - int prepare_mappings(struct pstree_item *t); - bool should_dump_page(VmaEntry *vmae, u64 pme); -diff --git a/criu/include/restorer.h b/criu/include/restorer.h -index 2f7345b..a81cc1b 100644 ---- a/criu/include/restorer.h -+++ b/criu/include/restorer.h -@@ -134,6 +134,10 @@ struct restore_vma_io { - - #define RIO_SIZE(niovs) (sizeof(struct restore_vma_io) + (niovs) * sizeof(struct iovec)) - -+struct vma_names { -+ char name[PATH_MAX]; -+}; -+ - struct task_restore_args { - struct thread_restore_args *t; /* thread group leader */ - -@@ -157,6 +161,8 @@ struct task_restore_args { - VmaEntry *vmas; - unsigned int vmas_n; - -+ struct vma_names *vma_names; -+ - int vma_ios_fd; - struct restore_vma_io *vma_ios; - unsigned int vma_ios_n; -diff --git a/criu/mem.c b/criu/mem.c -index 07efdbe..00965f0 100644 ---- a/criu/mem.c -+++ b/criu/mem.c -@@ -525,6 +525,9 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit - continue; - } - -+ if (vma_entry_is(vma_area->e, VMA_AREA_ANON_INODE)) -+ continue; -+ - ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, &pmc, has_parent, mdc->pre_dump, - parent_predump_mode); - if (ret < 0) -@@ -1355,6 +1358,9 @@ int open_vmas(struct pstree_item *t) - filemap_ctx_init(false); - - list_for_each_entry(vma, &vmas->h, list) { -+ if (vma_area_is(vma, VMA_AREA_ANON_INODE)) -+ continue; -+ - if (!vma_area_is(vma, VMA_AREA_REGULAR) || !vma->vm_open) - continue; - -@@ -1437,3 +1443,20 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta) - - return prepare_vma_ios(t, ta); - } -+ -+int prepare_vma_names(struct pstree_item *t, struct task_restore_args *ta) -+{ -+ struct vma_area *vma; -+ struct vm_area_list *vmas = &rsti(t)->vmas; -+ ta->vma_names = (struct vma_names *)rst_mem_align_cpos(RM_PRIVATE); -+ -+ list_for_each_entry(vma, &vmas->h, list) { -+ struct vma_names *vma_names; -+ vma_names = rst_mem_alloc(sizeof(*vma_names), RM_PRIVATE); -+ if (!vma_names) -+ return -1; -+ -+ memcpy(vma_names->name, vma->e->name, strlen(vma->e->name) + 1); -+ } -+ return 0; -+} -diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c -index 4a1d38d..549bbd6 100644 ---- a/criu/pie/restorer.c -+++ b/criu/pie/restorer.c -@@ -68,6 +68,27 @@ - #define FALLOC_FL_PUNCH_HOLE 0x02 - #endif - -+#define ANON_PROC_PATH "/sys/kernel/modrestore/anon_state_restore" -+ -+static int restore_anon_mapping(VmaEntry *vma_entry, struct vma_names *vma_name) -+{ -+ int fd; -+ -+ fd = sys_open(ANON_PROC_PATH, O_WRONLY, 0); -+ if (fd < 0) { -+ pr_info("anon sys fs open fail:%s\n", ANON_PROC_PATH); -+ return fd; -+ } -+ pr_info("restore anon mapping: %s\n", vma_name->name); -+ -+ if (sys_write(fd, vma_name->name, 4096) < 0) { -+ sys_close(fd); -+ return -1; -+ } -+ sys_close(fd); -+ return 0; -+} -+ - #define sys_prctl_safe(opcode, val1, val2, val3) \ - ({ \ - long __ret = sys_prctl(opcode, val1, val2, val3, 0); \ -@@ -1348,6 +1369,10 @@ static bool can_restore_vdso(struct task_restore_args *args) - } - - /* -+ * pr_info("anon vma name:%s\n", vma_name->name); -+ * if (restore_anon_mapping(vma_entry, vma_name) < 0) -+ * goto core_restore_end; -+ * continue; - * There is a use-case for restoring vvar alone: valgrind (see #488). - * On the other side, we expect that vvar is touched by application - * only from vdso. So, we can put a stale page and proceed restore -@@ -1528,6 +1553,7 @@ long __export_restore_task(struct task_restore_args *args) - pid_t my_pid = sys_getpid(); - rt_sigaction_t act; - bool has_vdso_proxy; -+ struct vma_names *vma_name; - - futex_set(&thread_inprogress, 1); - futex_set(&thread_start, 0); -@@ -1667,6 +1693,14 @@ long __export_restore_task(struct task_restore_args *args) - */ - for (i = 0; i < args->vmas_n; i++) { - vma_entry = args->vmas + i; -+ vma_name = args->vma_names + i; -+ -+ if (vma_entry_is(vma_entry, VMA_AREA_ANON_INODE)) { -+ pr_info("anon vma name:%s\n", vma_name->name); -+ if (restore_anon_mapping(vma_entry, vma_name) < 0) -+ goto core_restore_end; -+ continue; -+ } - - if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR) && !vma_entry_is(vma_entry, VMA_AREA_AIORING)) - continue; -@@ -1784,6 +1818,9 @@ long __export_restore_task(struct task_restore_args *args) - if (!vma_entry->has_madv || !vma_entry->madv) - continue; - -+ if (vma_entry_is(vma_entry, VMA_AREA_ANON_INODE)) -+ continue; -+ - for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) { - if (vma_entry->madv & (1ul << m)) { - ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m); -diff --git a/criu/proc_parse.c b/criu/proc_parse.c -index f3491e7..e41d43a 100644 ---- a/criu/proc_parse.c -+++ b/criu/proc_parse.c -@@ -76,6 +76,7 @@ static char *buf = __buf.buf; - */ - - #define AIO_FNAME "/[aio]" -+#define ANON_FNAME "anon_inode" - - /* check the @line starts with "%lx-%lx" format */ - static bool __is_vma_range_fmt(char *line) -@@ -171,8 +172,17 @@ static void parse_vma_vmflags(char *buf, struct vma_area *vma_area) - * only exception is VVAR area that mapped by the kernel as - * VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP - */ -- if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && !vma_entry_is(vma_area->e, VMA_FILE_SHARED)) -- vma_area->e->status |= VMA_UNSUPP; -+ /* There are many types of io/pf vm_map, not only vvar, but also -+ * anon_inode, and char device. -+ * For anon_inode and char device, we use anon_notifier to restore -+ * status. Therefore, we disable the broken code here. -+ */ -+// if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && -+// !vma_area_is(vma_area, VMA_AREA_ANON_INODE)) -+// { -+// pr_info("set current status tp VMA_UNSUPP\n"); -+// vma_area->e->status |= VMA_UNSUPP; -+// } - - if (vma_area->e->madv) - vma_area->e->has_madv = true; -@@ -437,6 +447,21 @@ static int vma_get_mapfile(const char *fname, struct vma_area *vma, DIR *mfd, st - return 0; - } - -+ if (!strncmp(fname, ANON_FNAME, sizeof(ANON_FNAME) - 1)) { -+ /*anon_inode*/ -+ close_safe(vm_file_fd); -+ vma->e->status = VMA_AREA_ANON_INODE; -+ vma->e->name = xmalloc(PATH_MAX); -+ if (!vma->e->name) { -+ pr_err("alloc vma name of anon-inode fail.\n"); -+ return -1; -+ } -+ snprintf(vma->e->name, PATH_MAX - 1, "%"PRIx64"-%"PRIx64 " %s", vma->e->start, vma->e->end, fname); -+ vma->e->name[PATH_MAX - 1] = 0; -+ pr_info("set vma_area status to: %d, name:%s\n", vma->e->status, vma->e->name); -+ return 0; -+ } -+ - pr_err("Unknown shit %o (%s)\n", buf.st_mode, fname); - return -1; - } -@@ -566,6 +591,7 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat - vma_area->e->shmid = prev->e->shmid; - vma_area->vmst = prev->vmst; - vma_area->mnt_id = prev->mnt_id; -+ vma_area->e->name = prev->e->name; - - if (!(vma_area->e->status & VMA_AREA_SYSVIPC)) { - vma_area->e->status &= ~(VMA_FILE_PRIVATE | VMA_FILE_SHARED); -@@ -728,7 +754,6 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap_t du - if (IS_ERR(str)) - goto err; - eof = (str == NULL); -- - if (!eof && !__is_vma_range_fmt(str)) { - if (!strncmp(str, "Nonlinear", 9)) { - BUG_ON(!vma_area); -diff --git a/images/vma.proto b/images/vma.proto -index 0c07d51..1aa30f9 100644 ---- a/images/vma.proto -+++ b/images/vma.proto -@@ -24,4 +24,5 @@ message vma_entry { - - /* file status flags */ - optional uint32 fdflags = 10 [(criu).hex = true]; -+ required string name = 11; - } --- -2.34.1 - diff --git a/0025-char_dev-add-support-for-char-device-dump-and-restor.patch b/0025-char_dev-add-support-for-char-device-dump-and-restor.patch deleted file mode 100644 index 8c30f59fa13d7e10038c48d110907af86da77578..0000000000000000000000000000000000000000 --- a/0025-char_dev-add-support-for-char-device-dump-and-restor.patch +++ /dev/null @@ -1,784 +0,0 @@ -From 2eebb9de411333628ce8fc5894f072b6ed6179e0 Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 19 May 2021 21:55:34 +0800 -Subject: [PATCH 25/72] char_dev: add support for char device dump and restore - -Add support for char device dump and restore during module upgrade. - -`/sys/kernel/repairing_device` provides the char device whiltelist -with `IOCTL_CMD_{NEEDREPAIR, REPAIR}` command besides the internal -device list. -The device modules could use `mures_{add, del}_devname()` to add, or -delete the char device whitelist dynamically. - -Signed-off-by: Xiaoguang Li -Signed-off-by: Jingxian He -Signed-off-by: fu.lin ---- - criu/Makefile.crtools | 2 + - criu/config.c | 1 + - criu/cr-dump.c | 4 ++ - criu/cr-restore.c | 4 +- - criu/crtools.c | 2 + - criu/devname.c | 130 +++++++++++++++++++++++++++++++++++ - criu/files-chr.c | 104 ++++++++++++++++++++++++++++ - criu/files-reg.c | 6 +- - criu/files.c | 93 ++++++++++++++++++++++++- - criu/include/cr_options.h | 1 + - criu/include/files-chr.h | 25 +++++++ - criu/include/files.h | 6 ++ - criu/include/image-desc.h | 1 + - criu/include/image.h | 1 + - criu/include/protobuf-desc.h | 1 + - criu/mem.c | 7 +- - criu/proc_parse.c | 21 +++++- - images/Makefile | 1 + - images/chr.proto | 12 ++++ - images/fdinfo.proto | 3 + - 20 files changed, 417 insertions(+), 8 deletions(-) - create mode 100644 criu/devname.c - create mode 100644 criu/files-chr.c - create mode 100644 criu/include/files-chr.h - create mode 100644 images/chr.proto - -diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools -index 98c4135..2e82912 100644 ---- a/criu/Makefile.crtools -+++ b/criu/Makefile.crtools -@@ -91,6 +91,8 @@ obj-y += pie-util-vdso.o - obj-y += vdso.o - obj-y += timens.o - obj-y += pin-mem.o -+obj-y += devname.o -+obj-y += files-chr.o - obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o - obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o - CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 -diff --git a/criu/config.c b/criu/config.c -index 5d1cff6..03cad66 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -701,6 +701,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - { "network-lock", required_argument, 0, 1100 }, - BOOL_OPT("use-fork-pid", &opts.use_fork_pid), - BOOL_OPT("with-notifier", &opts.with_notifier_kup), -+ BOOL_OPT("dump-char-dev", &opts.dump_char_dev), - {}, - }; - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index 50a2f9b..fd17413 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -88,6 +88,7 @@ - #include "asm/dump.h" - #include "pin-mem.h" - #include "notifier.h" -+#include "files-chr.h" - - /* - * Architectures can overwrite this function to restore register sets that -@@ -1880,6 +1881,9 @@ int cr_pre_dump_tasks(pid_t pid) - */ - rlimit_unlimit_nofile(); - -+ if (opts.dump_char_dev && parse_devname() < 0) -+ goto err; -+ - root_item = alloc_pstree_item(); - if (!root_item) - goto err; -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index b805265..2904a75 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -332,11 +332,11 @@ static int root_prepare_shared(void) - if (pi->pid->state == TASK_HELPER) - continue; - -- ret = prepare_mm_pid(pi); -+ ret = prepare_fd_pid(pi); - if (ret < 0) - break; - -- ret = prepare_fd_pid(pi); -+ ret = prepare_mm_pid(pi); - if (ret < 0) - break; - -diff --git a/criu/crtools.c b/criu/crtools.c -index 1d08620..dc6d603 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -451,6 +451,8 @@ usage: - " --use-fork-pid Allow to restore task pid by setting fork pid of task struct.\n" - " --with-notifier Allow to checkpoint/restore kup notifier chain.\n" - " This feature needs the kernel assistance.\n" -+ " --dump-char-dev Dump char dev files as normal file with repair cmd\n" -+ \ - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/devname.c b/criu/devname.c -new file mode 100644 -index 0000000..5f6fbed ---- /dev/null -+++ b/criu/devname.c -@@ -0,0 +1,130 @@ -+#include -+#include -+#include -+#include -+ -+#include "log.h" -+#include "common/xmalloc.h" -+ -+#define REPAIRING_DEVICE_FILE "/sys/kernel/repairing_device" -+#define ASCII_SIZE 128 -+ -+static void *root_bucket[ASCII_SIZE]; -+ -+static int insert_devname_internal(void *bucket[], const char *name) -+{ -+ void *new = NULL; -+ int idx = *name; -+ -+ if (bucket[idx] != NULL) -+ return insert_devname_internal(bucket[idx], name+1); -+ else if (idx == '\0') { -+ new = xmalloc(sizeof(void *)); -+ if (!new) { -+ pr_perror("alloc devname failed\n"); -+ return -1; -+ } -+ bucket[idx] = new; -+ return 0; -+ } else { -+ new = xmalloc(sizeof(void *) * ASCII_SIZE); -+ if (!new) { -+ pr_perror("alloc devname failed\n"); -+ return -1; -+ } -+ memset(new, 0, sizeof(void *) * ASCII_SIZE); -+ bucket[idx] = new; -+ return insert_devname_internal(bucket[idx], name+1); -+ } -+} -+ -+int insert_devname(const char *devname) -+{ -+ if (devname == NULL || *devname == '\0') // ignore -+ return 0; -+ -+ pr_debug("insert device '%s'\n", devname); -+ return insert_devname_internal(root_bucket, devname); -+} -+ -+int parse_devname(void) -+{ -+ int retval = -1; -+ char *line = NULL; -+ size_t len = 0; -+ ssize_t nread = 0; -+ FILE *fp = NULL; -+ -+ fp = fopen(REPAIRING_DEVICE_FILE, "r"); -+ if (fp == NULL) { -+ pr_info("Unable to open %s, downgrade to use internal whitelist\n", -+ REPAIRING_DEVICE_FILE); -+ return 0; -+ } -+ -+ while ((nread = getline(&line, &len, fp)) != -1) { -+ if (nread <= 1) // ignore empty string -+ continue; -+ -+ line[nread-1] = '\0'; // drop '\n' -+ retval = insert_devname(line); -+ if (retval != 0) -+ goto out; -+ } -+ retval = 0; -+ -+out: -+ free(line); -+ fclose(fp); -+ return retval; -+} -+ -+static const char *steal_devname(const char *name, ssize_t len) -+{ -+ ssize_t off = len; -+ -+ for (off -= 1; off > 0; off--) { -+ if (name[off] == '/') -+ break; -+ } -+ -+ return name + off + 1; -+} -+ -+static bool find_devname_internal(void *bucket[], const char *name) -+{ -+ int idx = *name; -+ -+ if (*name == '\0' && bucket[idx] != NULL) -+ return true; -+ else if (bucket[idx] == NULL) -+ return false; -+ else { -+ return find_devname_internal(bucket[idx], name+1); -+ } -+} -+ -+bool find_devname(const char *name) -+{ -+ const char *devname; -+ size_t len = 0; -+ bool found = false; -+ -+ if (name == NULL) -+ return false; -+ else if ((len = strlen(name)) == 0) -+ return false; -+ -+ devname = steal_devname(name, len); -+ found = find_devname_internal(root_bucket, devname); -+ -+ pr_debug("device '%s' (original name '%s') %s found in %s\n", -+ devname, name, found ? "is" : "isn't", REPAIRING_DEVICE_FILE); -+ -+ /* Compatible with the old version, there are still `strstr` branch in the following */ -+ found |= (strstr(name, "uverbs") != NULL -+ || strstr(name, "rdma_cm") != NULL -+ || strstr(name, "umad") != NULL); -+ -+ return found; -+} -diff --git a/criu/files-chr.c b/criu/files-chr.c -new file mode 100644 -index 0000000..2eb023e ---- /dev/null -+++ b/criu/files-chr.c -@@ -0,0 +1,104 @@ -+#include -+ -+#include "imgset.h" -+#include "pstree.h" -+#include "files-chr.h" -+#include "log.h" -+ -+#include "protobuf.h" -+ -+/* Checks if file descriptor @lfd is infinibandevent */ -+int is_infiniband_link(char *link) -+{ -+ return is_anon_link_type(link, "[infinibandevent]"); -+} -+ -+static int chrfile_open(struct file_desc *d, int *new_fd) -+{ -+ int fd, mntns_root; -+ int ret = 0; -+ struct chrfile_info *ci; -+ -+ ci = container_of(d, struct chrfile_info, d); -+ -+ if (ci->cfe->repair) -+ ci->cfe->flags |= O_REPAIR; -+ -+ mntns_root = open_pid_proc(getpid()); -+ fd = openat(mntns_root, ci->path, ci->cfe->flags); -+ if (fd < 0){ -+ pr_err("open chr file failed\n"); -+ return -1; -+ } -+ -+ if (ci->cfe->repair) { -+ ret = ioctl(fd, IOCTL_CMD_REPAIR , ci->cfe->index); -+ pr_info("repair ioctl return: %d, index: %d\n", ret, ci->cfe->index); -+ if (ret) -+ goto err; -+ } -+ -+ *new_fd = fd; -+ return ret; -+err: -+ close(fd); -+ return ret; -+} -+ -+static struct file_desc_ops chrfile_desc_ops = { -+ .type = FD_TYPES__CHR, -+ .open = chrfile_open, -+}; -+ -+static int collect_one_chrfile(void *o, ProtobufCMessage *base, struct cr_img *i) -+{ -+ struct chrfile_info *ci = o; -+ static char dot[] = "."; -+ -+ ci->cfe = pb_msg(base, ChrfileEntry); -+ if (ci->cfe->name[1] == '\0') -+ ci->path = dot; -+ else -+ ci->path = ci->cfe->name; -+ -+ pr_info("Collected chr file: %#x, name: %s\n", ci->cfe->id, ci->path); -+ file_desc_add(&ci->d, ci->cfe->id, &chrfile_desc_ops); -+ -+ return 0; -+} -+ -+struct collect_image_info chrfile_cinfo = { -+ .fd_type = CR_FD_CHRFILE, -+ .pb_type = PB_CHRFILE, -+ .priv_size = sizeof(struct chrfile_info), -+ .collect = collect_one_chrfile, -+}; -+ -+int collect_chr_map(struct pstree_item *me, struct vma_area *vma) -+{ -+ struct list_head *list = &rsti(me)->fds; -+ struct fdinfo_list_entry *fle, *tmp; -+ struct chrfile_info *ci; -+ bool exist_fd; -+ -+ -+ list_for_each_entry_safe(fle, tmp, list, ps_list) { -+ struct file_desc *d = fle->desc; -+ -+ if (d->ops->type != FD_TYPES__CHR) -+ continue; -+ -+ ci = container_of(d, struct chrfile_info, d); -+ if (!strcmp(vma->e->name, ci->path)) { -+ vma->vmfd = d; -+ vma->e->fd = fle->fe->fd; -+ exist_fd = true; -+ break; -+ } -+ } -+ -+ if (!exist_fd) -+ return -EEXIST; -+ -+ return 0; -+} -diff --git a/criu/files-reg.c b/criu/files-reg.c -index fbdf811..b9576a4 100644 ---- a/criu/files-reg.c -+++ b/criu/files-reg.c -@@ -45,6 +45,7 @@ - #include "fault-injection.h" - #include "external.h" - #include "memfd.h" -+#include "files-chr.h" - - #include "protobuf.h" - #include "util.h" -@@ -1640,7 +1641,8 @@ int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p) - rfe.has_mnt_id = true; - } - -- pr_info("Dumping path for %d fd via self %d [%s]\n", p->fd, lfd, &link->name[1]); -+ pr_info("Dumping path for %d fd via self %d [%s], id: %d\n", -+ p->fd, lfd, &link->name[1], id); - - /* - * The regular path we can handle should start with slash. -@@ -2373,7 +2375,7 @@ static int collect_one_regfile(void *o, ProtobufCMessage *base, struct cr_img *i - rfi->remap = NULL; - rfi->size_mode_checked = false; - -- pr_info("Collected [%s] ID %#x\n", rfi->path, rfi->rfe->id); -+ pr_info("Collected regfile [%s] ID %#x\n", rfi->path, rfi->rfe->id); - return file_desc_add(&rfi->d, rfi->rfe->id, ®_desc_ops); - } - -diff --git a/criu/files.c b/criu/files.c -index f262d80..e1681a1 100644 ---- a/criu/files.c -+++ b/criu/files.c -@@ -49,6 +49,7 @@ - #include "kerndat.h" - #include "fdstore.h" - #include "bpfmap.h" -+#include "files-chr.h" - - #include "protobuf.h" - #include "util.h" -@@ -325,10 +326,32 @@ int do_dump_gen_file(struct fd_parms *p, int lfd, const struct fdtype_ops *ops, - e->fd = p->fd; - e->flags = p->fd_flags; - -+ pr_info("fdinfoEntry fd: %d\n", e->fd); - ret = fd_id_generate(p->pid, e, p); - if (ret == 1) /* new ID generated */ - ret = ops->dump(lfd, e->id, p); -- else -+ else if (ops->type == FD_TYPES__CHR) { -+ /* -+ * Sometimes the app_data subprocess may inherit the fd from -+ * app_data. Those fds may result the unconditional oops during -+ * the restoration of app_data. Therefore, prevent the dump in -+ * those condition. -+ */ -+ struct fd_link _link, *link; -+ -+ if (!p->link) { -+ if (fill_fdlink(lfd, p, &_link)) -+ return -1; -+ link = &_link; -+ } else -+ link = p->link; -+ -+ if (find_devname(link->name)) { -+ pr_err("char dev '%s' fd %d is owned by multi-processes\n", -+ link->name, e->fd); -+ ret = -1; -+ } -+ } else - /* Remove locks generated by the fd before going to the next */ - discard_dup_locks_tail(p->pid, e->fd); - -@@ -466,6 +489,58 @@ static int dump_blkdev(struct fd_parms *p, int lfd, FdinfoEntry *e) - return err; - } - -+static int dump_chr_file(int lfd, u32 id, const struct fd_parms *p) -+{ -+ int ret; -+ struct fd_link _link, *link; -+ struct cr_img *img; -+ FileEntry fe = FILE_ENTRY__INIT; -+ ChrfileEntry cfe = CHRFILE_ENTRY__INIT; -+ -+ if (!p->link) { -+ if (fill_fdlink(lfd, p, &_link)) -+ return -1; -+ link = &_link; -+ } else -+ link = p->link; -+ -+ pr_info("Dumping chr-file fd %d with lfd %d with id %d, name: %s\n", p->fd, lfd, id, link->name); -+ -+ if (strstr(link->name, "(deleted)") != NULL) { -+ pr_err("char device '%s' is deleted\n", link->name); -+ return -ENXIO; -+ } -+ -+ cfe.repair = false; -+ if (find_devname(link->name)) { -+ ret = ioctl(lfd, IOCTL_CMD_NEEDREPAIR, 0); -+ if (ret <= 0) { -+ pr_err("ioctl cmd needrepair failed, errno: %d, %s\n", ret, strerror(errno)); -+ return -1; -+ } else { -+ pr_info("char device needrepair cmd return: %d\n", ret); -+ cfe.index = ret; -+ cfe.repair = true; -+ } -+ } -+ -+ cfe.id = id; -+ cfe.name = &link->name[1]; -+ cfe.flags = p->flags; -+ fe.type = FD_TYPES__CHR; -+ fe.id = cfe.id; -+ fe.chr = &cfe; -+ -+ img = img_from_set(glob_imgset, CR_FD_FILES); -+ ret = pb_write_one(img, &fe, PB_FILE); -+ return ret; -+} -+ -+const struct fdtype_ops chr_dump_ops = { -+ .type = FD_TYPES__CHR, -+ .dump = dump_chr_file, -+}; -+ - static int dump_chrdev(struct fd_parms *p, int lfd, FdinfoEntry *e) - { - struct fd_link *link_old = p->link; -@@ -493,6 +568,10 @@ static int dump_chrdev(struct fd_parms *p, int lfd, FdinfoEntry *e) - ops = &tty_dump_ops; - break; - } -+ if (opts.dump_char_dev) { -+ ops = &chr_dump_ops; -+ break; -+ } - - sprintf(more, "%d:%d", maj, minor(p->stat.st_rdev)); - err = dump_unsupp_fd(p, lfd, "chr", more, e); -@@ -559,6 +638,8 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, - ops = &signalfd_dump_ops; - else if (is_timerfd_link(link)) - ops = &timerfd_dump_ops; -+ else if (is_infiniband_link(link)) -+ return 1; - #ifdef CONFIG_HAS_LIBBPF - else if (is_bpfmap_link(link)) - ops = &bpfmap_dump_ops; -@@ -663,6 +744,11 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s - ret = dump_one_file(item->pid, dfds->fds[i + off], lfds[i], opts + i, ctl, &e, dfds); - if (ret) - break; -+ /* infiniband link file */ -+ if (ret > 0) { -+ ret = 0; -+ continue; -+ } - - ret = pb_write_one(img, &e, PB_FDINFO); - if (ret) -@@ -917,6 +1003,7 @@ int prepare_fd_pid(struct pstree_item *item) - if (!img) - return -1; - -+ pr_info("prepare_fd_pid\n"); - while (1) { - FdinfoEntry *e; - -@@ -1125,6 +1212,7 @@ int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd) - if (reopen_fd_as(fle->fe->fd, new_fd)) - return -1; - -+ pr_info("*******flags: %d",fle->fe->flags); - if (fcntl(fle->fe->fd, F_SETFD, fle->fe->flags) == -1) { - pr_perror("Unable to set file descriptor flags"); - return -1; -@@ -1761,6 +1849,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i) - ret = collect_one_file_entry(fe, fe->bpf->id, &fe->bpf->base, &bpfmap_cinfo); - break; - #endif -+ case FD_TYPES__CHR: -+ ret = collect_one_file_entry(fe, fe->chr->id, &fe->chr->base, &chrfile_cinfo); -+ break; - } - - return ret; -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index 039edba..226acb2 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -193,6 +193,7 @@ struct cr_options { - int pin_memory; - int use_fork_pid; - int with_notifier_kup; -+ int dump_char_dev; - }; - - extern struct cr_options opts; -diff --git a/criu/include/files-chr.h b/criu/include/files-chr.h -new file mode 100644 -index 0000000..5be11f5 ---- /dev/null -+++ b/criu/include/files-chr.h -@@ -0,0 +1,25 @@ -+#ifndef __CRIU_FILES_CHR_H__ -+#define __CRIU_FILES_CHR_H__ -+ -+#include "files.h" -+ -+#include "images/chr.pb-c.h" -+ -+struct chrfile_info { -+ struct file_desc d; -+ ChrfileEntry *cfe; -+ char *path; -+}; -+ -+extern struct collect_image_info chrfile_cinfo; -+ -+extern const struct fdtype_ops chr_dump_ops; -+extern int collect_chr_map(struct pstree_item *me, struct vma_area *); -+ -+int parse_devname(void); -+bool find_devname(const char *name); -+ -+int collect_chr_map(struct pstree_item *me, struct vma_area *vma); -+int is_infiniband_link(char *link); -+ -+#endif /* __CRIU_FILES_CHR_H__ */ -diff --git a/criu/include/files.h b/criu/include/files.h -index 96face7..1d979a9 100644 ---- a/criu/include/files.h -+++ b/criu/include/files.h -@@ -15,6 +15,12 @@ - #include "images/fown.pb-c.h" - #include "images/vma.pb-c.h" - -+#ifndef IOCTL_CMD_NEEDREPAIR -+#define IOCTL_CMD_NEEDREPAIR 0x00100000UL -+#define IOCTL_CMD_REPAIR 0x00200000UL -+#define O_REPAIR 040000000 -+#endif -+ - struct parasite_drain_fd; - struct pstree_item; - struct file_desc; -diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h -index 5045bae..e35f8b2 100644 ---- a/criu/include/image-desc.h -+++ b/criu/include/image-desc.h -@@ -115,6 +115,7 @@ enum { - CR_FD_MEMFD_FILE, - - CR_FD_AUTOFS, -+ CR_FD_CHRFILE, - - CR_FD_MAX - }; -diff --git a/criu/include/image.h b/criu/include/image.h -index f598de7..66492c0 100644 ---- a/criu/include/image.h -+++ b/criu/include/image.h -@@ -85,6 +85,7 @@ - #define VMA_AREA_AIORING (1 << 13) - #define VMA_AREA_MEMFD (1 << 14) - #define VMA_AREA_ANON_INODE (1 << 15) -+#define VMA_AREA_CHR (1 << 16) - - #define VMA_CLOSE (1 << 28) - #define VMA_NO_PROT_WRITE (1 << 29) -diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h -index 3824de1..2468e8f 100644 ---- a/criu/include/protobuf-desc.h -+++ b/criu/include/protobuf-desc.h -@@ -70,6 +70,7 @@ enum { - PB_BPFMAP_FILE, - PB_BPFMAP_DATA, - PB_APPARMOR, -+ PB_CHRFILE, - - /* PB_AUTOGEN_STOP */ - -diff --git a/criu/mem.c b/criu/mem.c -index 00965f0..b955d66 100644 ---- a/criu/mem.c -+++ b/criu/mem.c -@@ -32,6 +32,7 @@ - #include "compel/infect-util.h" - #include "pidfd-store.h" - #include "pin-mem.h" -+#include "files-chr.h" - - #include "protobuf.h" - #include "images/pagemap.pb-c.h" -@@ -717,7 +718,9 @@ int prepare_mm_pid(struct pstree_item *i) - - pr_info("vma 0x%" PRIx64 " 0x%" PRIx64 "\n", vma->e->start, vma->e->end); - -- if (vma_area_is(vma, VMA_ANON_SHARED)) -+ if (vma_area_is(vma, VMA_AREA_CHR)) -+ ret = collect_chr_map(i, vma); -+ else if (vma_area_is(vma, VMA_ANON_SHARED)) - ret = collect_shmem(pid, vma); - else if (vma_area_is(vma, VMA_FILE_PRIVATE) || vma_area_is(vma, VMA_FILE_SHARED)) - ret = collect_filemap(vma); -@@ -1358,7 +1361,7 @@ int open_vmas(struct pstree_item *t) - filemap_ctx_init(false); - - list_for_each_entry(vma, &vmas->h, list) { -- if (vma_area_is(vma, VMA_AREA_ANON_INODE)) -+ if (vma_area_is(vma, VMA_AREA_ANON_INODE) || vma_area_is(vma, VMA_AREA_CHR)) - continue; - - if (!vma_area_is(vma, VMA_AREA_REGULAR) || !vma->vm_open) -diff --git a/criu/proc_parse.c b/criu/proc_parse.c -index e41d43a..8913d93 100644 ---- a/criu/proc_parse.c -+++ b/criu/proc_parse.c -@@ -603,11 +603,30 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat - } else if (*vm_file_fd >= 0) { - struct stat *st_buf = vma_area->vmst; - -+ pr_info("file mode is: %x, st_ino: %ld\n", -+ st_buf->st_mode, st_buf->st_ino); - if (S_ISREG(st_buf->st_mode)) - /* regular file mapping -- supported */; - else if (S_ISCHR(st_buf->st_mode) && (st_buf->st_rdev == DEVZERO)) - /* devzero mapping -- also makes sense */; -- else { -+ else if (S_ISCHR(st_buf->st_mode) && opts.dump_char_dev) { -+ /* NOTICE: if `--dump-char-dev` option is set, permmit -+ * all char device memory area dumping. -+ */ -+ if (strstr(file_path, "uverbs") != NULL) { -+ int len = strlen(file_path) + 1; -+ -+ vma_area->e->status |= VMA_AREA_CHR; -+ vma_area->e->name = xmalloc(len); -+ if (!vma_area->e->name) { -+ pr_err("alloc vma area name failed\n"); -+ goto err; -+ strncpy(vma_area->e->name, file_path, len); -+ pr_info("vma name content is: %s\n", -+ vma_area->e->name); -+ } -+ } -+ } else { - pr_err("Can't handle non-regular mapping on %d's map %" PRIx64 "\n", pid, vma_area->e->start); - goto err; - } -diff --git a/images/Makefile b/images/Makefile -index 004e22e..37dff9a 100644 ---- a/images/Makefile -+++ b/images/Makefile -@@ -72,6 +72,7 @@ proto-obj-y += bpfmap-file.o - proto-obj-y += bpfmap-data.o - proto-obj-y += apparmor.o - proto-obj-y += rseq.o -+proto-obj-y += chr.o - - CFLAGS += -iquote $(obj)/ - -diff --git a/images/chr.proto b/images/chr.proto -new file mode 100644 -index 0000000..67929db ---- /dev/null -+++ b/images/chr.proto -@@ -0,0 +1,12 @@ -+syntax = "proto2"; -+ -+import "opts.proto"; -+ -+message chrfile_entry { -+ required uint32 id = 1; -+ required uint32 flags = 2 [(criu).flags = "rfile.flags"]; -+ required uint32 index = 3; -+ required string name = 4; -+ required bool repair = 5; -+}; -+ -diff --git a/images/fdinfo.proto b/images/fdinfo.proto -index 88f1c11..6549472 100644 ---- a/images/fdinfo.proto -+++ b/images/fdinfo.proto -@@ -20,6 +20,7 @@ import "pipe.proto"; - import "tty.proto"; - import "memfd.proto"; - import "bpfmap-file.proto"; -+import "chr.proto"; - - enum fd_types { - UND = 0; -@@ -42,6 +43,7 @@ enum fd_types { - TIMERFD = 17; - MEMFD = 18; - BPFMAP = 19; -+ CHR = 21; - - /* Any number above the real used. Not stored to image */ - CTL_TTY = 65534; -@@ -78,4 +80,5 @@ message file_entry { - optional tty_file_entry tty = 19; - optional memfd_file_entry memfd = 20; - optional bpfmap_file_entry bpf = 21; -+ optional chrfile_entry chr = 23; - } --- -2.34.1 - diff --git a/0026-improve-char-dev-fd-check-and-repair-method.patch b/0026-improve-char-dev-fd-check-and-repair-method.patch deleted file mode 100644 index 5d7a9a47bf8ae7ac04658b55caabf04307680ada..0000000000000000000000000000000000000000 --- a/0026-improve-char-dev-fd-check-and-repair-method.patch +++ /dev/null @@ -1,74 +0,0 @@ -From 539add7149df575d6d8cdce60ad6fb2c2300e27d Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Sun, 24 Oct 2021 15:20:27 +0800 -Subject: [PATCH 26/72] improve char dev fd check and repair method - -Some special char dev cannot work in child processes, we make dump fail -when the special char dev fd is in child processes. -In the char dev repair process, user may need recover fd. We should -make thre repair process running after the char dev fd is reopened as dumped fd. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: Jingxian He ---- - criu/files-chr.c | 11 +---------- - criu/files.c | 12 ++++++++++++ - 2 files changed, 13 insertions(+), 10 deletions(-) - -diff --git a/criu/files-chr.c b/criu/files-chr.c -index 2eb023e..315e9c6 100644 ---- a/criu/files-chr.c -+++ b/criu/files-chr.c -@@ -31,17 +31,8 @@ static int chrfile_open(struct file_desc *d, int *new_fd) - return -1; - } - -- if (ci->cfe->repair) { -- ret = ioctl(fd, IOCTL_CMD_REPAIR , ci->cfe->index); -- pr_info("repair ioctl return: %d, index: %d\n", ret, ci->cfe->index); -- if (ret) -- goto err; -- } -- - *new_fd = fd; -- return ret; --err: -- close(fd); -+ - return ret; - } - -diff --git a/criu/files.c b/criu/files.c -index e1681a1..7b688f5 100644 ---- a/criu/files.c -+++ b/criu/files.c -@@ -1231,6 +1231,7 @@ static int open_fd(struct fdinfo_list_entry *fle) - struct file_desc *d = fle->desc; - struct fdinfo_list_entry *flem; - int new_fd = -1, ret; -+ struct chrfile_info *ci; - - flem = file_master(d); - if (fle != flem) { -@@ -1258,6 +1259,17 @@ static int open_fd(struct fdinfo_list_entry *fle) - if (ret != -1 && new_fd >= 0) { - if (setup_and_serve_out(fle, new_fd) < 0) - return -1; -+ if (d->ops->type == FD_TYPES__CHR) { -+ ci = container_of(d, struct chrfile_info, d); -+ if (ci->cfe->repair) { -+ ret = ioctl(fle->fe->fd, IOCTL_CMD_REPAIR , ci->cfe->index); -+ pr_info("repair ioctl return: %d, index: %d\n", ret, ci->cfe->index); -+ if (ret) { -+ close(fle->fe->fd); -+ return -1; -+ } -+ } -+ } - } - out: - if (ret == 0) --- -2.34.1 - diff --git a/0027-mmap-restore-dev-hisi_sec2-deivce-vma.patch b/0027-mmap-restore-dev-hisi_sec2-deivce-vma.patch deleted file mode 100644 index ed5d313ccead7af37c761ac82977139eeaf69136..0000000000000000000000000000000000000000 --- a/0027-mmap-restore-dev-hisi_sec2-deivce-vma.patch +++ /dev/null @@ -1,472 +0,0 @@ -From fe19a2639373175c134fa51a7c1c26ca5306d22c Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Fri, 10 Sep 2021 16:06:55 +0800 -Subject: [PATCH 27/72] mmap: restore /dev/hisi_sec2* deivce vma - -There are two kinds of vmas: anonymous vma and file-based vma. For -anonymous vma, criu just map area and fill content to it; for file-based -vma, criu preprocess it, such as setting `open_vm()` callback function. - -`/dev/hisi_sec2*` char device is different from the normal. The `open`, -`mmap`, and `close` syscall actions has a special meaning. - - `open`: allocate physical resource of the device - - `mmap`: create instance - - `close`: release physical resource -The vma means the instance in this device. One fd may be associated with -a group instances: one mmio (vma size is 2 pages, pgoff is 0), one dus -(vma size is 37 pages, pgoff is 0x2000). As for dus vma, it's split two -vmas by `mprotect(addr, 0x5000, PROT_READ)`: one size is 0x20000, one -size is 0x5000. - -This patch makes the /dev/hisi_sec* restore possible. Idea: - It's impossible for criu to know the relationship between vma and the -mapped file fd. Therefore, just collect the total fds number during -collecting /dev/hisi_sec* files, then the fd is tagged that which -function is used during vma restoration, and aissign the unused fd to the -specific vma. And during `mmap()` process, dus vma is splitted by `mprotect`. - -Note: -- criu use ino to index the fd. -- this physical device drivers is hisi_sec2.ko, which is located in - `drivers/crypto/hisilicon/sec2/` of linux kernel. -- this device name has prefix "hisi_sec2" that is found from - `drivers/crypto/hisilicon/sec2/sec_main.c`. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: fu.lin ---- - criu/files-chr.c | 130 +++++++++++++++++++++++++++++++++++++-- - criu/include/files-chr.h | 16 +++++ - criu/include/vma.h | 12 ++++ - criu/pie/restorer.c | 130 ++++++++++++++++++++++++++++++++++++++- - criu/proc_parse.c | 4 +- - 5 files changed, 284 insertions(+), 8 deletions(-) - -diff --git a/criu/files-chr.c b/criu/files-chr.c -index 315e9c6..95d93e1 100644 ---- a/criu/files-chr.c -+++ b/criu/files-chr.c -@@ -6,6 +6,9 @@ - #include "log.h" - - #include "protobuf.h" -+#include "rst-malloc.h" -+ -+static unsigned hisi_sec_fds_n; - - /* Checks if file descriptor @lfd is infinibandevent */ - int is_infiniband_link(char *link) -@@ -16,11 +19,14 @@ int is_infiniband_link(char *link) - static int chrfile_open(struct file_desc *d, int *new_fd) - { - int fd, mntns_root; -- int ret = 0; -+ int ret = -1; - struct chrfile_info *ci; - - ci = container_of(d, struct chrfile_info, d); - -+ pr_info("charfile: Opening %s (repair %d index %d)\n", -+ ci->path, ci->cfe->repair, ci->cfe->index); -+ - if (ci->cfe->repair) - ci->cfe->flags |= O_REPAIR; - -@@ -32,6 +38,7 @@ static int chrfile_open(struct file_desc *d, int *new_fd) - } - - *new_fd = fd; -+ ret = 0; - - return ret; - } -@@ -52,10 +59,12 @@ static int collect_one_chrfile(void *o, ProtobufCMessage *base, struct cr_img *i - else - ci->path = ci->cfe->name; - -- pr_info("Collected chr file: %#x, name: %s\n", ci->cfe->id, ci->path); -- file_desc_add(&ci->d, ci->cfe->id, &chrfile_desc_ops); -+ /* collect `/dev/hisi_sec2*` fds */ -+ if (strstr(ci->path, HISI_SEC_DEV) != NULL) -+ hisi_sec_fds_n += 1; - -- return 0; -+ pr_info("Collected chr file: %#x, name: %s\n", ci->cfe->id, ci->path); -+ return file_desc_add(&ci->d, ci->cfe->id, &chrfile_desc_ops); - } - - struct collect_image_info chrfile_cinfo = { -@@ -65,6 +74,7 @@ struct collect_image_info chrfile_cinfo = { - .collect = collect_one_chrfile, - }; - -+static int handle_hisi_vma(struct list_head *fds, struct vma_area *vma); - int collect_chr_map(struct pstree_item *me, struct vma_area *vma) - { - struct list_head *list = &rsti(me)->fds; -@@ -72,6 +82,12 @@ int collect_chr_map(struct pstree_item *me, struct vma_area *vma) - struct chrfile_info *ci; - bool exist_fd; - -+ if (strstr(vma->e->name, HISI_SEC_DEV) != NULL) { -+ if (handle_hisi_vma(list, vma) != 0) { -+ return -1; -+ } else -+ goto out; -+ } - - list_for_each_entry_safe(fle, tmp, list, ps_list) { - struct file_desc *d = fle->desc; -@@ -91,5 +107,111 @@ int collect_chr_map(struct pstree_item *me, struct vma_area *vma) - if (!exist_fd) - return -EEXIST; - -+out: -+ pr_info(" `- find fd %ld for dev %s at this vma\n", vma->e->fd, vma->e->name); -+ -+ return 0; -+} -+ -+#define MAX_HISI_SEC_SIZE 3 /* one physical device expose three char dev */ -+static struct hlist_head hisi_sec_fds_hash[MAX_HISI_SEC_SIZE]; -+ -+static int collect_hisi_sec_fds(struct list_head *list) -+{ -+ struct fdinfo_list_entry *fle, *tmp; -+ struct chrfile_info *ci; -+ struct file_desc *d; -+ struct hisi_sec_desc *desc; -+ int idx; -+ int nr = 0; -+ -+ for (idx = 0; idx < MAX_HISI_SEC_SIZE; idx++) -+ INIT_HLIST_HEAD(&hisi_sec_fds_hash[idx]); -+ -+ list_for_each_entry_safe(fle, tmp, list, ps_list) { -+ d = fle->desc; -+ -+ if (d->ops->type != FD_TYPES__CHR) -+ continue; -+ -+ ci = container_of(d, struct chrfile_info, d); -+ -+ if (strstr(ci->path, HISI_SEC_DEV) != NULL) { -+ desc = shmalloc(sizeof(*desc)); -+ if (desc == NULL) -+ return -ENOMEM; -+ -+ desc->name = ci->path; -+ desc->fd = fle->fe->fd; -+ desc->mmio = desc->dus = 0; -+ -+ idx = (ci->path[strlen(ci->path)-1] - '0') % MAX_HISI_SEC_SIZE; -+ hlist_add_head(&desc->hash, &hisi_sec_fds_hash[idx]); -+ -+ nr += 1; -+ } -+ } -+ -+ return nr; -+} -+ -+static long delivery_hisi_sec_fd(struct list_head *fds, struct vma_area *vma) -+{ -+ extern unsigned hisi_sec_fds_n; /* defined in criu/files.c */ -+ static bool initialized = false; -+ struct hisi_sec_desc *desc; -+ int fd = -1, idx; -+ -+ if (!initialized) { -+ int nr; -+ -+ pr_info("find %d fds for hisi_sec char device\n", hisi_sec_fds_n); -+ -+ nr = collect_hisi_sec_fds(fds); -+ if (nr != hisi_sec_fds_n) { -+ pr_err("Collected fds(%d) aren't equal opened(%d)\n", -+ nr, hisi_sec_fds_n); -+ return -1; -+ } -+ -+ initialized = true; -+ } else if (vma->e->pgoff != HISI_SEC_MMIO && vma->e->pgoff != HISI_SEC_DUS) { -+ /* It's impossible value for fd, just as a tag to show it's a -+ * vma by `mprotect` syscall. -+ */ -+ return LONG_MAX; -+ } -+ -+ idx = (vma->e->name[strlen(vma->e->name)-1] - '0') % MAX_HISI_SEC_SIZE; -+ hlist_for_each_entry(desc, &hisi_sec_fds_hash[idx], hash) { -+ if (strcmp(desc->name, vma->e->name) != 0) -+ continue; -+ -+ if (vma->e->pgoff == HISI_SEC_MMIO && !desc->mmio) { -+ fd = desc->fd; -+ desc->mmio = true; -+ break; -+ } else if (vma->e->pgoff == HISI_SEC_DUS && !desc->dus) { -+ fd = desc->fd; -+ desc->dus = true; -+ break; -+ } -+ } -+ -+ return fd; -+} -+ -+static int handle_hisi_vma(struct list_head *fds, struct vma_area *vma) -+{ -+ long fd = delivery_hisi_sec_fd(fds, vma); -+ -+ if (fd < 0) { -+ pr_err("find fd for char dev vma pgoff %lx named %s failed.\n", -+ vma->e->pgoff, vma->e->name); -+ return -1; -+ } -+ -+ vma->e->fd = fd; -+ - return 0; - } -diff --git a/criu/include/files-chr.h b/criu/include/files-chr.h -index 5be11f5..26b8fb2 100644 ---- a/criu/include/files-chr.h -+++ b/criu/include/files-chr.h -@@ -22,4 +22,20 @@ bool find_devname(const char *name); - int collect_chr_map(struct pstree_item *me, struct vma_area *vma); - int is_infiniband_link(char *link); - -+struct hisi_sec_desc { -+ struct hlist_node hash; -+ char *name; -+ bool mmio; -+ bool dus; -+ int fd; -+}; -+ -+#define HISI_SEC_DEV "hisi_sec2" /* `/dev/hisi_sec2*` char device */ -+ -+/* here is the selection of offset in `mmap`, they're from drivers */ -+enum hisi_sec_dev { -+ HISI_SEC_MMIO = 0x0, -+ HISI_SEC_DUS = 0x2000, -+}; -+ - #endif /* __CRIU_FILES_CHR_H__ */ -diff --git a/criu/include/vma.h b/criu/include/vma.h -index ed9f31e..2b6e86f 100644 ---- a/criu/include/vma.h -+++ b/criu/include/vma.h -@@ -125,4 +125,16 @@ static inline bool vma_entry_can_be_lazy(VmaEntry *e) - !(vma_entry_is(e, VMA_AREA_VDSO)) && !(vma_entry_is(e, VMA_AREA_VSYSCALL))); - } - -+struct vma_attr { -+ int prot; -+ int flags; -+}; -+ -+enum ALIEN_MAP_METHOD { -+ PGOFF_IS_ZERO, -+ MAP_THEN_PROTECT, -+ -+ MAX_ALIEN_MAP_METHOD, -+}; -+ - #endif /* __CR_VMA_H__ */ -diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c -index 549bbd6..dcc922e 100644 ---- a/criu/pie/restorer.c -+++ b/criu/pie/restorer.c -@@ -37,6 +37,7 @@ - #include "uffd.h" - #include "sched.h" - #include "notifier.h" -+#include "files-chr.h" - - #include "common/lock.h" - #include "common/page.h" -@@ -861,6 +862,129 @@ static unsigned long restore_mapping(VmaEntry *vma_entry) - return addr; - } - -+static unsigned long restore_map_then_protect_mapping(VmaEntry *curr, -+ struct vma_attr *curr_attr, -+ VmaEntry *next, -+ struct vma_attr *next_attr) -+{ -+ int retval; -+ unsigned long addr; -+ -+ if (next->fd != LONG_MAX -+ || curr->end != next->start -+ || (vma_entry_len(curr) + curr->pgoff) != next->pgoff -+ || curr->prot == next->prot -+ || curr->flags != next->flags) { -+ pr_err("They looks not currect:\n"); -+ pr_err(" `- vma A: (%x %x %d %lx)\n", -+ curr_attr->prot, curr_attr->flags, -+ (int)curr->fd, curr->pgoff); -+ pr_err(" `- vma B: (%x %x %d %lx)\n", -+ next_attr->prot, next_attr->flags, -+ (int)next->fd, next->pgoff); -+ return -1; -+ } -+ -+ pr_info("\tmmap(%x %x %d %lx) in map then protect mapping\n", -+ curr_attr->prot, curr_attr->flags, -+ (int)curr->fd, curr->pgoff); -+ -+ addr = sys_mmap(decode_pointer(curr->start), -+ vma_entry_len(curr) + vma_entry_len(next), -+ curr_attr->prot, curr_attr->flags, curr->fd, curr->pgoff); -+ if (addr != curr->start) { -+ pr_err("%s: mmap failed with code %ld\n", __func__, addr); -+ goto out; -+ } -+ -+ pr_info("\t mprotect(%x)\n", next_attr->prot); -+ retval = sys_mprotect(decode_pointer(next->start), -+ vma_entry_len(next), next_attr->prot); -+ if (retval != 0) { -+ addr = retval; -+ pr_err("%s: mprotect failed with code %d\n", __func__, retval); -+ } -+ -+out: -+ return addr; -+} -+ -+static unsigned long restore_pgoff_is_zero_mapping(VmaEntry *curr, struct vma_attr *attr) -+{ -+ unsigned long addr; -+ -+ pr_debug("\tmmap(%x %x %d %lx) in pgoff is zero mapping\n", -+ attr->prot, attr->flags, (int)curr->fd, curr->pgoff); -+ -+ addr = sys_mmap(decode_pointer(curr->start), -+ vma_entry_len(curr), -+ attr->prot, attr->flags, -+ curr->fd, curr->pgoff); -+ -+ return addr; -+} -+ -+static unsigned long restore_hisi_sec_mapping(struct task_restore_args *args, -+ int i, int *step) -+{ -+ VmaEntry *curr = args->vmas + i; -+ VmaEntry *next = args->vmas + i + 1; -+ struct vma_attr curr_attr = { -+ .prot = curr->prot, -+ .flags = curr->flags | MAP_FIXED, -+ }; -+ struct vma_attr next_attr = { -+ .prot = next->prot, -+ .flags = next->flags | MAP_FIXED, -+ }; -+ unsigned long addr; -+ -+ switch (curr->pgoff) { -+ case HISI_SEC_MMIO: -+ addr = restore_pgoff_is_zero_mapping(curr, &curr_attr); -+ break; -+ case HISI_SEC_DUS: -+ *step = 2; -+ addr = restore_map_then_protect_mapping(curr, &curr_attr, next, &next_attr); -+ break; -+ default: -+ pr_err("invalid pgoff %lx for vma\n", curr->pgoff); -+ return -1; -+ } -+ return addr; -+} -+ -+static bool find(const char *s1, const char *s2) -+{ -+ if (s1 == NULL || s2 == NULL) -+ return NULL; -+ -+ while (*s1 != '\0' && *s2 != '\0') { -+ if (*s1 == *s2) { -+ s1 += 1; -+ s2 += 1; -+ } else -+ s1 += 1; -+ -+ if (*s2 == '\0') -+ return true; -+ } -+ -+ return false; -+} -+ -+static unsigned long distribute_restore_mapping(struct task_restore_args *args, -+ int i, int *step) -+{ -+ VmaEntry *vma = args->vmas + i; -+ struct vma_names *vma_name = args->vma_names + i; -+ -+ if (vma_entry_is(vma, VMA_AREA_CHR) && find(vma_name->name, HISI_SEC_DEV)) -+ return restore_hisi_sec_mapping(args, i, step); -+ else -+ return restore_mapping(vma); -+} -+ - /* - * This restores aio ring header, content, head and in-kernel position - * of tail. To set tail, we write to /dev/null and use the fact this -@@ -1542,7 +1666,7 @@ int write_fork_pid(int pid) - long __export_restore_task(struct task_restore_args *args) - { - long ret = -1; -- int i; -+ int i, step; - VmaEntry *vma_entry; - unsigned long va; - struct restore_vma_io *rio; -@@ -1691,7 +1815,7 @@ long __export_restore_task(struct task_restore_args *args) - /* - * OK, lets try to map new one. - */ -- for (i = 0; i < args->vmas_n; i++) { -+ for (i = 0, step = 1; i < args->vmas_n; i += step, step = 1) { - vma_entry = args->vmas + i; - vma_name = args->vma_names + i; - -@@ -1708,7 +1832,7 @@ long __export_restore_task(struct task_restore_args *args) - if (vma_entry_is(vma_entry, VMA_PREMMAPED)) - continue; - -- va = restore_mapping(vma_entry); -+ va = distribute_restore_mapping(args, i, &step); - - if (va != vma_entry->start) { - pr_err("Can't restore %" PRIx64 " mapping with %lx\n", vma_entry->start, va); -diff --git a/criu/proc_parse.c b/criu/proc_parse.c -index 8913d93..daa54d9 100644 ---- a/criu/proc_parse.c -+++ b/criu/proc_parse.c -@@ -41,6 +41,7 @@ - #include "path.h" - #include "fault-injection.h" - #include "memfd.h" -+#include "files-chr.h" - - #include "protobuf.h" - #include "images/fdinfo.pb-c.h" -@@ -613,7 +614,8 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat - /* NOTICE: if `--dump-char-dev` option is set, permmit - * all char device memory area dumping. - */ -- if (strstr(file_path, "uverbs") != NULL) { -+ if (strstr(file_path, "uverbs") != NULL -+ || strstr(file_path, HISI_SEC_DEV) != NULL) { - int len = strlen(file_path) + 1; - - vma_area->e->status |= VMA_AREA_CHR; --- -2.34.1 - diff --git a/0028-infiniband-fix-the-infiniband-fd-conflict.patch b/0028-infiniband-fix-the-infiniband-fd-conflict.patch deleted file mode 100644 index 45fc13da30cda0ded15bedf65b4e32fd0e69ab1f..0000000000000000000000000000000000000000 --- a/0028-infiniband-fix-the-infiniband-fd-conflict.patch +++ /dev/null @@ -1,223 +0,0 @@ -From 5ff0e810f04de4b31f605ba3179dec3b3777978a Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Mon, 8 Nov 2021 15:08:12 +0800 -Subject: [PATCH 28/72] infiniband: fix the infiniband fd conflict - -Phenomenon: - Operating uverbs device will generate anonymous fd named -`anon_inode:[infinibandevent]`. When `anon_inode:[infinibandevent]` fd -is the last opened fd, and some kind of unix socket fd exist, which is -generated by syscalls like `socketpair()` at the same tim, -`anon_inode:[infinibandevent]` will restore fail probabilistically. - -log as the following: - -``` -(00.254523) 63959: open file flags:1 -(00.254526) 63959: unix: Opening standalone (stage 0 id 0x1ff ino 1019605 peer 0) -(00.254571) 63959: *******flags: 0 -(00.254575) 63959: Create fd for 1408 # the fake fd -(00.254578) 63959: *******flags: 1 -(00.254580) 63959: Create fd for 445 # the restoration fd -``` - -Reason: - During the restoration of unix socket, `socketpair()` will generate -two fds, one is used to the current restoration, another is called fake -fd which fd nr is owned by `find_unused_fd()`. When -`anon_inode:[infinibandevent]` fd is the last one, criu don't dump the -fd information for `anon_inode:[infinibandevent]` in original -implementation, and criu think the fd nr which should belong to -`anon_inode:[infinibandevent]` isn't used. Therefore, it cause the -`anon_inode:[infinibandevent]` restoration fail. - -This patch fix the above problem. Core: dump -`anon_inode:[infinibandevent]` fd information, make the criu is aware -that that fd nr is used. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: fu.lin ---- - criu/files-chr.c | 57 ++++++++++++++++++++++++++++++++++++ - criu/files.c | 10 +++---- - criu/include/files-chr.h | 8 +++++ - criu/include/image-desc.h | 1 + - criu/include/protobuf-desc.h | 1 + - images/chr.proto | 3 ++ - images/fdinfo.proto | 2 ++ - 7 files changed, 76 insertions(+), 6 deletions(-) - -diff --git a/criu/files-chr.c b/criu/files-chr.c -index 95d93e1..6d87c33 100644 ---- a/criu/files-chr.c -+++ b/criu/files-chr.c -@@ -215,3 +215,60 @@ static int handle_hisi_vma(struct list_head *fds, struct vma_area *vma) - - return 0; - } -+ -+static void pr_info_infiniband(char *action, InfinibandEntry *infiniband) -+{ -+ pr_info("%sinfiniband: id %#08x\n", action, infiniband->id); -+} -+ -+static int dump_one_infiniband(int lfd, u32 id, const struct fd_parms *p) -+{ -+ FileEntry fe = FILE_ENTRY__INIT; -+ InfinibandEntry infiniband = INFINIBAND_ENTRY__INIT; -+ -+ infiniband.id = id; -+ -+ fe.type = FD_TYPES__INFINIBAND; -+ fe.id = infiniband.id; -+ fe.infiniband = &infiniband; -+ -+ pr_info_infiniband("Dumping ", &infiniband); -+ -+ return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); -+} -+ -+const struct fdtype_ops infiniband_dump_ops = { -+ .type = FD_TYPES__INFINIBAND, -+ .dump = dump_one_infiniband, -+}; -+ -+static int infiniband_open(struct file_desc *d, int *new_fd) { -+ /* -+ * `*new_fd == -1` at this time, it means this open operation shouldn't -+ * be served out, which is why this function does nothing here. -+ */ -+ return 0; -+}; -+ -+static struct file_desc_ops infiniband_desc_ops = { -+ .type = FD_TYPES__INFINIBAND, -+ .open = infiniband_open, -+}; -+ -+static int collect_one_infiniband(void *o, ProtobufCMessage *base, struct cr_img *i) -+{ -+ struct infiniband_file_info *info = o; -+ -+ info->infiniband = pb_msg(base, InfinibandEntry); -+ pr_info_infiniband("Collected ", info->infiniband); -+ -+ /* add the fd to `file_desc_hash` list to prevent from NULL pointer */ -+ return file_desc_add(&info->d, info->infiniband->id, &infiniband_desc_ops); -+} -+ -+struct collect_image_info infiniband_cinfo = { -+ .fd_type = CR_FD_INFINIBAND, -+ .pb_type = PB_INFINIBAND, -+ .priv_size = sizeof(struct infiniband_file_info), -+ .collect = collect_one_infiniband, -+}; -diff --git a/criu/files.c b/criu/files.c -index 7b688f5..1ec5281 100644 ---- a/criu/files.c -+++ b/criu/files.c -@@ -639,7 +639,7 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, - else if (is_timerfd_link(link)) - ops = &timerfd_dump_ops; - else if (is_infiniband_link(link)) -- return 1; -+ ops = &infiniband_dump_ops; - #ifdef CONFIG_HAS_LIBBPF - else if (is_bpfmap_link(link)) - ops = &bpfmap_dump_ops; -@@ -744,11 +744,6 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s - ret = dump_one_file(item->pid, dfds->fds[i + off], lfds[i], opts + i, ctl, &e, dfds); - if (ret) - break; -- /* infiniband link file */ -- if (ret > 0) { -- ret = 0; -- continue; -- } - - ret = pb_write_one(img, &e, PB_FDINFO); - if (ret) -@@ -1864,6 +1859,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i) - case FD_TYPES__CHR: - ret = collect_one_file_entry(fe, fe->chr->id, &fe->chr->base, &chrfile_cinfo); - break; -+ case FD_TYPES__INFINIBAND: -+ ret = collect_one_file_entry(fe, fe->infiniband->id, &fe->infiniband->base, &infiniband_cinfo); -+ break; - } - - return ret; -diff --git a/criu/include/files-chr.h b/criu/include/files-chr.h -index 26b8fb2..261c4b2 100644 ---- a/criu/include/files-chr.h -+++ b/criu/include/files-chr.h -@@ -38,4 +38,12 @@ enum hisi_sec_dev { - HISI_SEC_DUS = 0x2000, - }; - -+struct infiniband_file_info { -+ InfinibandEntry *infiniband; -+ struct file_desc d; -+}; -+ -+extern const struct fdtype_ops infiniband_dump_ops; -+extern struct collect_image_info infiniband_cinfo; -+ - #endif /* __CRIU_FILES_CHR_H__ */ -diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h -index e35f8b2..9ad5fa0 100644 ---- a/criu/include/image-desc.h -+++ b/criu/include/image-desc.h -@@ -116,6 +116,7 @@ enum { - - CR_FD_AUTOFS, - CR_FD_CHRFILE, -+ CR_FD_INFINIBAND, - - CR_FD_MAX - }; -diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h -index 2468e8f..72a9e1d 100644 ---- a/criu/include/protobuf-desc.h -+++ b/criu/include/protobuf-desc.h -@@ -71,6 +71,7 @@ enum { - PB_BPFMAP_DATA, - PB_APPARMOR, - PB_CHRFILE, -+ PB_INFINIBAND, - - /* PB_AUTOGEN_STOP */ - -diff --git a/images/chr.proto b/images/chr.proto -index 67929db..ed65005 100644 ---- a/images/chr.proto -+++ b/images/chr.proto -@@ -10,3 +10,6 @@ message chrfile_entry { - required bool repair = 5; - }; - -+message infiniband_entry { -+ required uint32 id = 1; -+}; -diff --git a/images/fdinfo.proto b/images/fdinfo.proto -index 6549472..eb52f35 100644 ---- a/images/fdinfo.proto -+++ b/images/fdinfo.proto -@@ -44,6 +44,7 @@ enum fd_types { - MEMFD = 18; - BPFMAP = 19; - CHR = 21; -+ INFINIBAND = 22; - - /* Any number above the real used. Not stored to image */ - CTL_TTY = 65534; -@@ -81,4 +82,5 @@ message file_entry { - optional memfd_file_entry memfd = 20; - optional bpfmap_file_entry bpf = 21; - optional chrfile_entry chr = 23; -+ optional infiniband_entry infiniband = 25; - } --- -2.34.1 - diff --git a/0029-cred-provide-cred-checkpoint-restore-method.patch b/0029-cred-provide-cred-checkpoint-restore-method.patch deleted file mode 100644 index 2ede5b5894a3f5877574aa7eebc19bc138a0ed28..0000000000000000000000000000000000000000 --- a/0029-cred-provide-cred-checkpoint-restore-method.patch +++ /dev/null @@ -1,255 +0,0 @@ -From e522deb5680840e878b8f05c66f040cfd3b49d90 Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 19 May 2021 21:47:28 +0800 -Subject: [PATCH 29/72] cred: provide cred checkpoint restore method - -criu checkpoint/restore the task, it only restore the context instead of -the memory address storing the context. - -To handle the problem resulted by CVE bugfix, details: -- https://nvd.nist.gov/vuln/detail/CVE-2016-4565 -- https://openfabrics.org/images/2018workshop/presentations/113_MRuhl_JourneytoVerbsIOCTL.pdf - -Brief: - Refresh the security context address of file. The infiniband code use -write()` as bi-directional `ioctl()`, there is `struct cred` address -uring `write()` process. However, criu uses some syscall, such as -capset()` and `setgroups()`, to regenerate the new cred, the file -red is fixed by `fcntl(F_SETOWN)`, then the address of new cred is -ifferent from the file. - This patch fix the `struct cred` address checking problem resulted by -VE fixed in infiniband drivers. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: luolongjun -Signed-off-by: fu.lin ---- - criu/config.c | 1 + - criu/cr-restore.c | 35 +++++++++++++++++++++++++++++++++++ - criu/crtools.c | 1 + - criu/include/cr_options.h | 1 + - criu/include/fcntl.h | 4 ++++ - criu/include/prctl.h | 4 ++++ - criu/include/restorer.h | 3 +++ - criu/pie/restorer.c | 38 ++++++++++++++++++++++++++++++++++++++ - 8 files changed, 87 insertions(+) - -diff --git a/criu/config.c b/criu/config.c -index 03cad66..cf99fb1 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -702,6 +702,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - BOOL_OPT("use-fork-pid", &opts.use_fork_pid), - BOOL_OPT("with-notifier", &opts.with_notifier_kup), - BOOL_OPT("dump-char-dev", &opts.dump_char_dev), -+ BOOL_OPT("with-fd-cred", &opts.with_fd_cred), - {}, - }; - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 2904a75..ac677a1 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -692,6 +692,28 @@ static int __collect_child_pids(struct pstree_item *p, int state, unsigned int * - return 0; - } - -+static int collect_child_fds(int state, unsigned int *n, struct pstree_item *me) -+{ -+ struct list_head *list = &rsti(me)->fds; -+ struct fdinfo_list_entry *fle, *tmp; -+ -+ *n = 0; -+ list_for_each_entry_safe(fle, tmp, list, ps_list) { -+ if (fle->fe->type == state) { -+ int *child; -+ -+ child = rst_mem_alloc(sizeof(*child), RM_PRIVATE); -+ if (!child) -+ return -1; -+ -+ (*n)++; -+ *child = fle->fe->fd; -+ } -+ } -+ -+ return 0; -+} -+ - static int collect_child_pids(int state, unsigned int *n) - { - struct pstree_item *pi; -@@ -715,6 +737,12 @@ static int collect_child_pids(int state, unsigned int *n) - return __collect_child_pids(current, state, n); - } - -+static int collect_chr_fds(struct pstree_item *me, struct task_restore_args *ta) -+{ -+ ta->setcred_pids = (int *)rst_mem_align_cpos(RM_PRIVATE); -+ return collect_child_fds(FD_TYPES__CHR, &ta->setcred_pids_n, me); -+} -+ - static int collect_helper_pids(struct task_restore_args *ta) - { - ta->helpers = (pid_t *)rst_mem_align_cpos(RM_PRIVATE); -@@ -939,6 +967,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core) - if (collect_zombie_pids(ta) < 0) - return -1; - -+ if (opts.with_fd_cred && collect_chr_fds(current, ta) < 0) -+ return -1; -+ - if (collect_inotify_fds(ta) < 0) - return -1; - -@@ -3746,6 +3777,10 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns - RST_MEM_FIXUP_PPTR(task_args->helpers); - RST_MEM_FIXUP_PPTR(task_args->zombies); - RST_MEM_FIXUP_PPTR(task_args->vma_ios); -+ if (opts.with_fd_cred) -+ RST_MEM_FIXUP_PPTR(task_args->setcred_pids); -+ else -+ task_args->setcred_pids_n = UINT_MAX; - RST_MEM_FIXUP_PPTR(task_args->inotify_fds); - - task_args->compatible_mode = core_is_compat(core); -diff --git a/criu/crtools.c b/criu/crtools.c -index dc6d603..ed7bd99 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -453,6 +453,7 @@ usage: - " This feature needs the kernel assistance.\n" - " --dump-char-dev Dump char dev files as normal file with repair cmd\n" - \ -+ " --with-fd-cred Allow to make the restored process has the same cred\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index 226acb2..1d6ddcf 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -194,6 +194,7 @@ struct cr_options { - int use_fork_pid; - int with_notifier_kup; - int dump_char_dev; -+ int with_fd_cred; - }; - - extern struct cr_options opts; -diff --git a/criu/include/fcntl.h b/criu/include/fcntl.h -index 35f8805..568977c 100644 ---- a/criu/include/fcntl.h -+++ b/criu/include/fcntl.h -@@ -19,6 +19,10 @@ struct f_owner_ex { - #define F_GETOWNER_UIDS 17 - #endif - -+#ifndef F_SETCRED -+#define F_SETCRED 18 -+#endif -+ - /* - * These things are required to compile on CentOS-6 - */ -diff --git a/criu/include/prctl.h b/criu/include/prctl.h -index c843f40..81dda9d 100644 ---- a/criu/include/prctl.h -+++ b/criu/include/prctl.h -@@ -82,4 +82,8 @@ struct prctl_mm_map { - #define PR_GET_THP_DISABLE 42 - #endif - -+#ifndef PR_DEFAULT_CRED -+#define PR_DEFAULT_CRED 54 -+#endif -+ - #endif /* __CR_PRCTL_H__ */ -diff --git a/criu/include/restorer.h b/criu/include/restorer.h -index a81cc1b..60c1dab 100644 ---- a/criu/include/restorer.h -+++ b/criu/include/restorer.h -@@ -193,6 +193,9 @@ struct task_restore_args { - pid_t *zombies; - unsigned int zombies_n; - -+ int *setcred_pids; -+ unsigned int setcred_pids_n; -+ - int *inotify_fds; /* fds to cleanup inotify events at CR_STATE_RESTORE_SIGCHLD stage */ - unsigned int inotify_fds_n; - -diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c -index dcc922e..fde6e30 100644 ---- a/criu/pie/restorer.c -+++ b/criu/pie/restorer.c -@@ -101,6 +101,7 @@ static int restore_anon_mapping(VmaEntry *vma_entry, struct vma_names *vma_name) - static struct task_entries *task_entries_local; - static futex_t thread_inprogress; - static futex_t thread_start; -+static futex_t cred_set; - static pid_t *helpers; - static int n_helpers; - static pid_t *zombies; -@@ -365,6 +366,41 @@ static int restore_creds(struct thread_creds_args *args, int procfd, int lsm_typ - return 0; - } - -+static int update_cred_ref(struct task_restore_args *ta) -+{ -+ int i; -+ int ret; -+ int pid = sys_getpid(); -+ long int tid = sys_gettid(); -+ -+ if (ta->setcred_pids_n == UINT_MAX) { -+ pr_info("no need to keep the same cred \n"); -+ return 0; -+ } -+ -+ if (pid == tid) { -+ /* let main thread finish cred update first */ -+ ret = sys_prctl(PR_DEFAULT_CRED, 0, 0, 0, 0); -+ pr_info("main cred restore \n"); -+ futex_set_and_wake(&cred_set, 1); -+ } else { -+ futex_wait_until(&cred_set, 1); -+ pr_info("other cred restore \n"); -+ ret = sys_prctl(PR_DEFAULT_CRED, 0, 0, 0, 0); -+ } -+ -+ if (ret) -+ return ret; -+ -+ pr_info("%ld (%d) is going to update current cred \n", tid, pid); -+ -+ for (i = 0; i < ta->setcred_pids_n; i++) { -+ sys_fcntl(ta->setcred_pids[i], F_SETCRED, 0); -+ } -+ -+ return 0; -+} -+ - /* - * This should be done after creds restore, as - * some creds changes might drop the value back -@@ -742,6 +778,7 @@ long __export_restore_thread(struct thread_restore_args *args) - BUG(); - - ret = restore_creds(args->creds_args, args->ta->proc_fd, args->ta->lsm_type); -+ ret = ret || update_cred_ref(args->ta); - ret = ret || restore_dumpable_flag(&args->ta->mm); - ret = ret || restore_pdeath_sig(args); - if (ret) -@@ -2221,6 +2258,7 @@ long __export_restore_task(struct task_restore_args *args) - * thus restore* creds _after_ all of the above. - */ - ret = restore_creds(args->t->creds_args, args->proc_fd, args->lsm_type); -+ ret = ret || update_cred_ref(args); - ret = ret || restore_dumpable_flag(&args->mm); - ret = ret || restore_pdeath_sig(args->t); - ret = ret || restore_child_subreaper(args->child_subreaper); --- -2.34.1 - diff --git a/0030-socket-fix-connect-error-of-invalid-param.patch b/0030-socket-fix-connect-error-of-invalid-param.patch deleted file mode 100644 index 32a43d8e49e2354f9a65b4aa43487453df39d4c7..0000000000000000000000000000000000000000 --- a/0030-socket-fix-connect-error-of-invalid-param.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 8afde209d2a9245d902eabe40ca7c514aeb6ee9a Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 19 May 2021 21:56:16 +0800 -Subject: [PATCH 30/72] socket: fix connect error of invalid param - -Fix connect error of invalid param during module upgrade. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: Xiaoguang Li -Signed-off-by: fu.lin ---- - criu/include/sockets.h | 1 + - criu/sk-inet.c | 13 +++++++++++-- - criu/sockets.c | 5 ++++- - 3 files changed, 16 insertions(+), 3 deletions(-) - -diff --git a/criu/include/sockets.h b/criu/include/sockets.h -index 3e8f3d6..2391b48 100644 ---- a/criu/include/sockets.h -+++ b/criu/include/sockets.h -@@ -27,6 +27,7 @@ struct socket_desc { - extern int dump_socket(struct fd_parms *p, int lfd, FdinfoEntry *); - extern int dump_socket_opts(int sk, SkOptsEntry *soe); - extern int restore_socket_opts(int sk, SkOptsEntry *soe); -+extern int restore_bound_opts(int sk, SkOptsEntry *soe); - extern void release_skopts(SkOptsEntry *); - extern int restore_prepare_socket(int sk); - extern void preload_socket_modules(void); -diff --git a/criu/sk-inet.c b/criu/sk-inet.c -index e52b198..05048c8 100644 ---- a/criu/sk-inet.c -+++ b/criu/sk-inet.c -@@ -100,15 +100,20 @@ static void show_one_inet(const char *act, const struct inet_sk_desc *sk) - static void show_one_inet_img(const char *act, const InetSkEntry *e) - { - char src_addr[INET_ADDR_LEN] = ""; -+ char dst_addr[INET_ADDR_LEN] = ""; - - if (inet_ntop(e->family, (void *)e->src_addr, src_addr, INET_ADDR_LEN) == NULL) { - pr_perror("Failed to translate address"); - } -+ if (inet_ntop(e->family, (void *)e->dst_addr, dst_addr, -+ INET_ADDR_LEN) == NULL) { -+ pr_perror("Failed to translate address"); -+ } - - pr_debug("\t%s: family %-10s type %-14s proto %-16s port %d " -- "state %-16s src_addr %s\n", -+ "state %-16s src_addr %s dst_addr %s\n", - act, ___socket_family_name(e->family), ___socket_type_name(e->type), ___socket_proto_name(e->proto), -- e->src_port, ___tcp_state_name(e->state), src_addr); -+ e->src_port, ___tcp_state_name(e->state), src_addr, dst_addr); - } - - static int can_dump_ipproto(unsigned int ino, int proto, int type) -@@ -852,6 +857,10 @@ static int open_inet_sk(struct file_desc *d, int *new_fd) - if (restore_opt(sk, SOL_SOCKET, SO_REUSEPORT, &yes)) - goto err; - -+ if(restore_bound_opts(sk, ie->opts) < 0){ -+ goto err; -+ } -+ - if (tcp_connection(ie)) { - if (!opts.tcp_established_ok && !opts.tcp_close) { - pr_err("Connected TCP socket in image\n"); -diff --git a/criu/sockets.c b/criu/sockets.c -index 9426b5b..2ddf85e 100644 ---- a/criu/sockets.c -+++ b/criu/sockets.c -@@ -586,7 +586,6 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) - tv.tv_usec = soe->so_rcv_tmo_usec; - ret |= restore_opt(sk, SOL_SOCKET, SO_RCVTIMEO, &tv); - -- ret |= restore_bound_dev(sk, soe); - ret |= restore_socket_filter(sk, soe); - - /* The restore of SO_REUSEADDR depends on type of socket */ -@@ -594,6 +593,10 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) - return ret; - } - -+int restore_bound_opts(int sk, SkOptsEntry *soe){ -+ return restore_bound_dev(sk, soe); -+} -+ - int do_dump_opt(int sk, int level, int name, void *val, int len) - { - socklen_t aux = len; --- -2.34.1 - diff --git a/0031-criu-eventpollfd-fix-for-improper-usage-in-appdata.patch b/0031-criu-eventpollfd-fix-for-improper-usage-in-appdata.patch deleted file mode 100644 index c3e2a6a9dcb2e81a93c12fa9325cb260f5de3f56..0000000000000000000000000000000000000000 --- a/0031-criu-eventpollfd-fix-for-improper-usage-in-appdata.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 89eb9deee6da8acc7747e103ee591f299fec2043 Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 19 May 2021 21:56:38 +0800 -Subject: [PATCH 31/72] criu: eventpollfd fix for improper usage in appdata - -Fix eventpollfd problem of improper usage in appdata. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: Jingxian He -Signed-off-by: fu.lin ---- - criu/eventpoll.c | 16 +++++++++++----- - criu/proc_parse.c | 2 ++ - images/eventpoll.proto | 3 +++ - 3 files changed, 16 insertions(+), 5 deletions(-) - -diff --git a/criu/eventpoll.c b/criu/eventpoll.c -index 978dca5..8900d50 100644 ---- a/criu/eventpoll.c -+++ b/criu/eventpoll.c -@@ -67,8 +67,8 @@ int is_eventpoll_link(char *link) - - static void pr_info_eventpoll_tfd(char *action, uint32_t id, EventpollTfdEntry *e) - { -- pr_info("%seventpoll-tfd: id %#08x tfd %8d events %#08x data %#016" PRIx64 "\n", action, id, e->tfd, e->events, -- e->data); -+ pr_info("%seventpoll-tfd: id %#08x tfd %8d events %#08x data %#016" PRIx64 " ignore %d\n", -+ action, id, e->tfd, e->events, e->data, e->ignore); - } - - static void pr_info_eventpoll(char *action, EventpollFileEntry *e) -@@ -144,9 +144,9 @@ int flush_eventpoll_dinfo_queue(void) - }; - struct kid_elem *t = kid_lookup_epoll_tfd(&fd_tree, &ke, &slot); - if (!t) { -- pr_debug("kid_lookup_epoll: no match pid %d efd %d tfd %d toff %u\n", dinfo->pid, -- dinfo->efd, tfde->tfd, dinfo->toff[i].off); -- goto err; -+ pr_info("Drop tfd entry, pid %d efd %d tfd %d toff %u\n", -+ dinfo->pid, dinfo->efd, tfde->tfd, dinfo->toff[i].off); -+ continue; - } - - pr_debug("kid_lookup_epoll: rbsearch match pid %d efd %d tfd %d toff %u -> %d\n", dinfo->pid, -@@ -159,6 +159,7 @@ int flush_eventpoll_dinfo_queue(void) - goto err; - } - -+ pr_info("Change tfd: %d -> %d @ efd=%d\n", tfde->tfd, t->idx, slot.efd); - tfde->tfd = t->idx; - } - -@@ -409,6 +410,11 @@ static int eventpoll_retore_tfd(int fd, int id, EventpollTfdEntry *tdefe) - { - struct epoll_event event; - -+ if (tdefe->ignore) { -+ pr_info_eventpoll_tfd("Ignore ", id, tdefe); -+ return 0; -+ } -+ - pr_info_eventpoll_tfd("Restore ", id, tdefe); - - event.events = tdefe->events; -diff --git a/criu/proc_parse.c b/criu/proc_parse.c -index daa54d9..d13589c 100644 ---- a/criu/proc_parse.c -+++ b/criu/proc_parse.c -@@ -1895,10 +1895,12 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg) - e->has_dev = false; - e->has_inode = false; - e->has_pos = false; -+ e->has_ignore = false; - } else if (ret == 6) { - e->has_dev = true; - e->has_inode = true; - e->has_pos = true; -+ e->has_ignore = true; - } else if (ret < 6) { - eventpoll_tfd_entry__free_unpacked(e, NULL); - goto parse_err; -diff --git a/images/eventpoll.proto b/images/eventpoll.proto -index 0f3e8a8..2fd9598 100644 ---- a/images/eventpoll.proto -+++ b/images/eventpoll.proto -@@ -14,6 +14,9 @@ message eventpoll_tfd_entry { - optional uint32 dev = 5; - optional uint64 inode = 6; - optional uint64 pos = 7; -+ -+ /* entry validation */ -+ optional uint32 ignore = 8; - } - - message eventpoll_file_entry { --- -2.34.1 - diff --git a/0032-task_exit_notify-add-task-exit-notify-mask-method-fo.patch b/0032-task_exit_notify-add-task-exit-notify-mask-method-fo.patch deleted file mode 100644 index f1d2396a169cb1a73b6bf3f5bd94bb8385168f8c..0000000000000000000000000000000000000000 --- a/0032-task_exit_notify-add-task-exit-notify-mask-method-fo.patch +++ /dev/null @@ -1,193 +0,0 @@ -From 58a8c9eb07c2cff6232c20f9a59edc634bb1e5e0 Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 19 May 2021 21:59:24 +0800 -Subject: [PATCH 32/72] task_exit_notify: add task exit notify mask method for - criu - -Add task exit notify mask method for criu during kernel module upgrade. - -Signed-off-by: Jingxian He -Signed-off-by: fu.lin ---- - criu/Makefile.crtools | 1 + - criu/config.c | 1 + - criu/cr-restore.c | 10 ++++++++++ - criu/crtools.c | 1 + - criu/exit-notify.c | 34 ++++++++++++++++++++++++++++++++++ - criu/include/cr_options.h | 1 + - criu/include/exit-notify.h | 10 ++++++++++ - criu/seize.c | 10 +++++++++- - 8 files changed, 67 insertions(+), 1 deletion(-) - create mode 100644 criu/exit-notify.c - create mode 100644 criu/include/exit-notify.h - -diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools -index 2e82912..65cc215 100644 ---- a/criu/Makefile.crtools -+++ b/criu/Makefile.crtools -@@ -93,6 +93,7 @@ obj-y += timens.o - obj-y += pin-mem.o - obj-y += devname.o - obj-y += files-chr.o -+obj-y += exit-notify.o - obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o - obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o - CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 -diff --git a/criu/config.c b/criu/config.c -index cf99fb1..bd0f84d 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -703,6 +703,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - BOOL_OPT("with-notifier", &opts.with_notifier_kup), - BOOL_OPT("dump-char-dev", &opts.dump_char_dev), - BOOL_OPT("with-fd-cred", &opts.with_fd_cred), -+ BOOL_OPT("mask-exit-notify", &opts.mask_exit_notify), - {}, - }; - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index ac677a1..09f135b 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -82,6 +82,7 @@ - #include "apparmor.h" - #include "pin-mem.h" - #include "notifier.h" -+#include "exit-notify.h" - - #include "parasite-syscall.h" - #include "files-reg.h" -@@ -1542,6 +1543,15 @@ static inline int fork_with_pid(struct pstree_item *item) - pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item)); - } - -+ if (opts.mask_exit_notify) { -+ int pid = ret; -+ -+ pr_info("Start unmask exit notifier for pid %d\n", pid); -+ ret = mask_task_exit_notify(pid, false); -+ if (ret) -+ pr_err("Can't unmask exit notifier for pid %d\n", pid); -+ } -+ - err_unlock: - if (!(ca.clone_flags & CLONE_NEWPID)) - unlock_last_pid(); -diff --git a/criu/crtools.c b/criu/crtools.c -index ed7bd99..1a41be4 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -454,6 +454,7 @@ usage: - " --dump-char-dev Dump char dev files as normal file with repair cmd\n" - \ - " --with-fd-cred Allow to make the restored process has the same cred\n" -+ " --mask-exit-notify Mask task exit notify during dump and restore\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/exit-notify.c b/criu/exit-notify.c -new file mode 100644 -index 0000000..5c86724 ---- /dev/null -+++ b/criu/exit-notify.c -@@ -0,0 +1,34 @@ -+#include -+#include -+#include -+#include -+ -+#include "exit-notify.h" -+#include "log.h" -+ -+int mask_task_exit_notify(int pid, bool mask) -+{ -+ int fd, retval; -+ char buf[PID_BUF_SIZE] = {0}; -+ -+ if (pid <= 0) -+ return -1; -+ -+ snprintf(buf, PID_BUF_SIZE - 1, "%d", pid); -+ if (mask) -+ fd = open(MASK_EXIT_NOTIFY_DIR, O_WRONLY, 0); -+ else -+ fd = open(UNMASK_EXIT_NOTIFY_DIR, O_WRONLY, 0); -+ -+ if (fd < 0) { -+ pr_err("open mask exit notify file fail\n"); -+ return fd; -+ } -+ -+ retval = write(fd, buf, PID_BUF_SIZE); -+ if (retval < 0) -+ pr_err("Write mask exit pid: %s fail\n", buf); -+ close(fd); -+ -+ return retval < 0 ? -1 : 0; -+} -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index 1d6ddcf..26ae5b6 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -195,6 +195,7 @@ struct cr_options { - int with_notifier_kup; - int dump_char_dev; - int with_fd_cred; -+ int mask_exit_notify; - }; - - extern struct cr_options opts; -diff --git a/criu/include/exit-notify.h b/criu/include/exit-notify.h -new file mode 100644 -index 0000000..34f2c8d ---- /dev/null -+++ b/criu/include/exit-notify.h -@@ -0,0 +1,10 @@ -+#ifndef __CRIU_EXIT_NOTIFY_H__ -+#define __CRIU_EXIT_NOTIFY_H__ -+ -+#define PID_BUF_SIZE 32 -+#define MASK_EXIT_NOTIFY_DIR "/sys/kernel/mask_exit_notify" -+#define UNMASK_EXIT_NOTIFY_DIR "/sys/kernel/unmask_exit_notify" -+ -+int mask_task_exit_notify(int pid, bool mask); -+ -+#endif /* __CRIU_EXIT_NOTIFY_H__ */ -diff --git a/criu/seize.c b/criu/seize.c -index 8a35c3c..1e127ff 100644 ---- a/criu/seize.c -+++ b/criu/seize.c -@@ -24,6 +24,8 @@ - #include "xmalloc.h" - #include "util.h" - #include "pin-mem.h" -+#include "mem.h" -+#include "exit-notify.h" - - #define NR_ATTEMPTS 5 - -@@ -636,7 +638,7 @@ free: - - static void unseize_task_and_threads(const struct pstree_item *item, int st) - { -- int i; -+ int i, ret; - - if (item->pid->state == TASK_DEAD) - return; -@@ -646,6 +648,12 @@ static void unseize_task_and_threads(const struct pstree_item *item, int st) - dump_task_special_pages(item->threads[i].real); - } - -+ if (opts.mask_exit_notify && (st == TASK_DEAD)) { -+ ret = mask_task_exit_notify(item->threads[0].real, true); -+ if (ret) -+ pr_err("mask exit notify for %d fail.\n", item->threads[0].real); -+ } -+ - /* - * The st is the state we want to switch tasks into, - * the item->state is the state task was in when we seized one. --- -2.34.1 - diff --git a/0033-unix-socket-add-support-for-unix-stream-socket.patch b/0033-unix-socket-add-support-for-unix-stream-socket.patch deleted file mode 100644 index 2d619151c88d4d04be6952097fe992b594711a3b..0000000000000000000000000000000000000000 --- a/0033-unix-socket-add-support-for-unix-stream-socket.patch +++ /dev/null @@ -1,403 +0,0 @@ -From fe39f73462e84a1a59d9b2b81a97e26cd1f2d20c Mon Sep 17 00:00:00 2001 -From: Luo Longjun -Date: Mon, 7 Jun 2021 11:50:42 +0800 -Subject: [PATCH 33/72] unix socket: add support for unix stream socket - -When dump unix stream socket with external connections, -we will tell kernel to turn repair mode on for this sock. -And then kernel will keep this sock before restoring it. -In this process, the other socket which communicates with -this sock in repair mode will get EAGAIN or blocked. - -Signed-off-by: Luo Longjun - -fix unix socket dump and restore err -Fix name-less unix socket dump and restore problem. - -Signed-off-by: Jingxian He - -unix socket:ignore repair error from kernel -leave error for applications to deal with. - -Signed-off-by: Luo Longjun - -- enable this feature by check cmdline `unix_stream_restore_enable` -- don't set repair mode for non-external socket - -Signed-off-by: fu.lin ---- - criu/cr-dump.c | 1 + - criu/include/kerndat.h | 1 + - criu/include/sockets.h | 1 + - criu/kerndat.c | 33 +++++++++ - criu/sk-unix.c | 150 ++++++++++++++++++++++++++++++++++++++--- - images/sk-unix.proto | 1 + - 6 files changed, 178 insertions(+), 9 deletions(-) - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index fd17413..e0e11cc 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -2002,6 +2002,7 @@ static int cr_dump_finish(int ret) - - cr_plugin_fini(CR_PLUGIN_STAGE__DUMP, ret); - cgp_fini(); -+ unix_stream_unlock(ret); - - if (!ret) { - /* -diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h -index 05abeda..3979939 100644 ---- a/criu/include/kerndat.h -+++ b/criu/include/kerndat.h -@@ -76,6 +76,7 @@ struct kerndat_s { - bool has_nftables_concat; - bool has_rseq; - bool has_ptrace_get_rseq_conf; -+ bool has_unix_sk_repair; - }; - - extern struct kerndat_s kdat; -diff --git a/criu/include/sockets.h b/criu/include/sockets.h -index 2391b48..e43a760 100644 ---- a/criu/include/sockets.h -+++ b/criu/include/sockets.h -@@ -43,6 +43,7 @@ extern int add_fake_unix_queuers(void); - extern int fix_external_unix_sockets(void); - extern int prepare_scms(void); - extern int unix_note_scm_rights(int id_for, uint32_t *file_ids, int *fds, int n_ids); -+extern void unix_stream_unlock(int ret); - - extern struct collect_image_info netlink_sk_cinfo; - -diff --git a/criu/kerndat.c b/criu/kerndat.c -index af7113a..6d6aac1 100644 ---- a/criu/kerndat.c -+++ b/criu/kerndat.c -@@ -1259,6 +1259,36 @@ static int kerndat_has_nftables_concat(void) - #endif - } - -+#define UNIX_STREAM_RESTORE_ENABLE_FILE "/sys/module/kernel/parameters/unix_stream_restore_enable" -+ -+static void kerndat_has_unix_sk_repair(void) -+{ -+ FILE *fp; -+ char ch = 'N'; -+ -+ if (access(UNIX_STREAM_RESTORE_ENABLE_FILE, F_OK) < 0) { -+ pr_debug("C/R external unix stream socket is not support\n"); -+ return; -+ } -+ -+ fp = fopen(UNIX_STREAM_RESTORE_ENABLE_FILE, "r"); -+ if (fp == NULL) { -+ pr_err("failed to open '%s': %s\n", -+ UNIX_STREAM_RESTORE_ENABLE_FILE, strerror(errno)); -+ return; -+ } -+ -+ fscanf(fp, "%c", &ch); -+ if (ch == 'Y') { -+ pr_debug("enable C/R external unix stream socket support\n"); -+ kdat.has_unix_sk_repair = true; -+ } -+ -+ fclose(fp); -+ -+ return; -+} -+ - int kerndat_init(void) - { - int ret; -@@ -1419,6 +1449,9 @@ int kerndat_init(void) - pr_err("kerndat_has_ptrace_get_rseq_conf failed when initializing kerndat.\n"); - ret = -1; - } -+ -+ kerndat_has_unix_sk_repair(); -+ - kerndat_lsm(); - kerndat_mmap_min_addr(); - kerndat_files_stat(); -diff --git a/criu/sk-unix.c b/criu/sk-unix.c -index f3fe60c..86bfa18 100644 ---- a/criu/sk-unix.c -+++ b/criu/sk-unix.c -@@ -72,6 +72,7 @@ struct unix_sk_desc { - char *name; - unsigned int nr_icons; - unsigned int *icons; -+ int repair_ino; - - unsigned int vfs_dev; - unsigned int vfs_ino; -@@ -89,9 +90,18 @@ struct unix_sk_desc { - struct list_head peer_list; - struct list_head peer_node; - -+ struct list_head repair_list; -+ struct list_head repair_node; -+ struct unix_stream_extern_socket_desc *ext_node; -+ - UnixSkEntry *ue; - }; - -+struct unix_stream_extern_socket_desc { -+ struct list_head list; -+ int fd; -+}; -+ - /* - * The mutex_ghost is accessed from different tasks, - * so make sure it is in shared memory. -@@ -100,6 +110,7 @@ static mutex_t *mutex_ghost; - - static LIST_HEAD(unix_sockets); - static LIST_HEAD(unix_ghost_addr); -+static LIST_HEAD(unix_stream_external_sockets); - - static int unix_resolve_name(int lfd, uint32_t id, struct unix_sk_desc *d, UnixSkEntry *ue, const struct fd_parms *p); - -@@ -116,6 +127,26 @@ struct unix_sk_listen_icon { - - static struct unix_sk_listen_icon *unix_listen_icons[SK_HASH_SIZE]; - -+static int unix_stream_repair_on(int fd) -+{ -+ int ret, aux = 1; -+ ret = setsockopt(fd, SOL_TCP, TCP_REPAIR_OPTIONS, &aux, sizeof(aux)); -+ if (ret < 0) -+ pr_err("Can't turn repair mod for unix stream on. \n"); -+ -+ return ret; -+} -+ -+static int unix_stream_repair_off(int fd) -+{ -+ int ret, aux = 0; -+ ret = setsockopt(fd, SOL_TCP, TCP_REPAIR_OPTIONS, &aux, sizeof(aux)); -+ if (ret < 0) -+ pr_err("Can't turn repair mod for unix stream off. \n"); -+ -+ return ret; -+} -+ - static struct unix_sk_listen_icon *lookup_unix_listen_icons(unsigned int peer_ino) - { - struct unix_sk_listen_icon *ic; -@@ -331,6 +362,8 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) - FilePermsEntry *perms; - FownEntry *fown; - void *m; -+ unsigned int len; -+ int ret; - - m = xmalloc(sizeof(UnixSkEntry) + sizeof(SkOptsEntry) + sizeof(FilePermsEntry) + sizeof(FownEntry)); - if (!m) -@@ -372,6 +405,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) - ue->fown = fown; - ue->opts = skopts; - ue->uflags = 0; -+ ue->repair_ino = 0; - - if (unix_resolve_name(lfd, id, sk, ue, p)) - goto err; -@@ -419,6 +453,41 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) - goto err; - } - -+ /* -+ * Don't handle non-external unix socket, criu will restore it. -+ * -+ * use `sk->name != NULL || peer->name != NULL` to prevent -+ * `socketpair()` sk condition. -+ */ -+ if (kdat.has_unix_sk_repair && !sk->sd.already_dumped -+ && (sk->name != NULL || peer->name != NULL) -+ && ue->type == SOCK_STREAM) { -+ struct unix_stream_extern_socket_desc *d; -+ -+ d = xzalloc(sizeof(*d)); -+ if (!d) -+ goto err; -+ -+ /* Attention: used for upgrade in the same machine -+ * May in conflict with original usage -+ */ -+ pr_info("set %d(fd %d) unix stream repair on \n", sk->sd.ino, lfd); -+ ret = unix_stream_repair_on(lfd); -+ if (ret < 0) -+ goto err; -+ -+ d->fd = dup(lfd); -+ pr_info("add %d into unix_stream_external_sockets\n", sk->sd.ino); -+ list_add_tail(&d->list, &unix_stream_external_sockets); -+ list_add(&sk->repair_node, &peer->repair_list); -+ sk->ext_node = d; -+ -+ len = sizeof(ue->repair_ino); -+ ret = getsockopt(lfd, SOL_TCP, TCP_REPAIR_OPTIONS, &ue->repair_ino, &len); -+ if (ret < 0) -+ goto err; -+ } -+ - /* - * Peer should have us as peer or have a name by which - * we can access one. -@@ -520,6 +589,26 @@ dump: - - sk->sd.already_dumped = 1; - -+ while (!list_empty(&sk->repair_list)) { -+ struct unix_sk_desc *psk; -+ struct unix_stream_extern_socket_desc *d; -+ -+ psk = list_first_entry(&sk->repair_list, struct unix_sk_desc, repair_node); -+ list_del_init(&psk->repair_node); -+ -+ pr_info("delete ino %d into unix_stream_external_sockets\n", psk->sd.ino); -+ -+ d = psk->ext_node; -+ list_del_init(&d->list); -+ psk->ext_node = NULL; -+ /* ino start from 1, using 0 to tag the non-repairing socket is safe. */ -+ psk->ue->repair_ino = 0; -+ -+ unix_stream_repair_off(d->fd); -+ close_safe(&d->fd); -+ xfree(d); -+ } -+ - while (!list_empty(&sk->peer_list)) { - struct unix_sk_desc *psk; - psk = list_first_entry(&sk->peer_list, struct unix_sk_desc, peer_node); -@@ -754,6 +843,8 @@ static int unix_collect_one(const struct unix_diag_msg *m, struct nlattr **tb, s - - INIT_LIST_HEAD(&d->peer_list); - INIT_LIST_HEAD(&d->peer_node); -+ INIT_LIST_HEAD(&d->repair_list); -+ INIT_LIST_HEAD(&d->repair_node); - d->fd = -1; - - if (tb[UNIX_DIAG_SHUTDOWN]) -@@ -866,16 +957,18 @@ static int __dump_external_socket(struct unix_sk_desc *sk, struct unix_sk_desc * - return -1; - } - -- if (peer->type != SOCK_DGRAM) { -- show_one_unix("Ext stream not supported", peer); -- pr_err("Can't dump half of stream unix connection.\n"); -+ if (peer->type != SOCK_DGRAM && -+ peer->type != SOCK_STREAM) { -+ show_one_unix("Ext unix type not supported", peer); -+ pr_err("Can't dump this kind of unix connection.\n"); - return -1; - } - -- if (!peer->name) { -+ /* part 1: prevent NULL pointer oops */ -+ if (!peer->name && !sk->name) { - show_one_unix("Ext dgram w/o name", peer); -+ show_one_unix("Ext dgram w/o name", sk); - pr_err("Can't dump name-less external socket.\n"); -- pr_err("%d\n", sk->fd); - return -1; - } - -@@ -921,7 +1014,7 @@ int fix_external_unix_sockets(void) - - fd_id_generate_special(NULL, &e.id); - e.ino = sk->sd.ino; -- e.type = SOCK_DGRAM; -+ e.type = sk->type; - e.state = TCP_LISTEN; - e.name.data = (void *)sk->name; - e.name.len = (size_t)sk->namelen; -@@ -948,6 +1041,20 @@ err: - return -1; - } - -+void unix_stream_unlock(int ret) -+{ -+ struct unix_stream_extern_socket_desc *d; -+ pr_debug("Unlocking unix stream sockets\n"); -+ -+ list_for_each_entry(d, &unix_stream_external_sockets, list) { -+ if (ret) { -+ pr_debug("unlock fd %d \n", d->fd); -+ unix_stream_repair_off(d->fd); -+ } -+ close_safe(&d->fd); -+ } -+} -+ - struct unix_sk_info { - UnixSkEntry *ue; - struct list_head list; -@@ -1335,6 +1442,7 @@ static int post_open_standalone(struct file_desc *d, int fd) - struct unix_sk_info *peer; - struct sockaddr_un addr; - int cwd_fd = -1, root_fd = -1, ns_fd = -1; -+ int ret, value; - - ui = container_of(d, struct unix_sk_info, d); - BUG_ON((ui->flags & (USK_PAIR_MASTER | USK_PAIR_SLAVE)) || (ui->ue->uflags & (USK_CALLBACK | USK_INHERIT))); -@@ -1391,7 +1499,28 @@ static int post_open_standalone(struct file_desc *d, int fd) - * while we're connecting in sake of ghost sockets. - */ - mutex_lock(mutex_ghost); -- if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) { -+ -+ /* we handle unix stream with external connections here. -+ * -+ * use `sk->name != NULL || peer->name != NULL` to prevent -+ * `socketpair()` sk condition. -+ */ -+ if (kdat.has_unix_sk_repair && peer->name -+ && (ui->name != NULL || peer->name != NULL) -+ && ui->ue->type == SOCK_STREAM && ui->ue->repair_ino != 0) { -+ value = ui->ue->repair_ino; -+ ret = setsockopt(fd, SOL_TCP, TCP_REPAIR, &value, sizeof(value)); -+ if (ret < 0) { -+ /* permit the unix sk resume successfully when the peer has been -+ * closed, just warn here */ -+ pr_warn("Can't repair %d socket\n", value); -+ } -+ -+ ret = unix_stream_repair_off(fd); -+ if (ret < 0) { -+ goto err_revert_and_exit; -+ } -+ } else if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) { - pr_perror("Can't connect %d socket", ui->ue->ino); - goto err_revert_and_exit; - } -@@ -2068,8 +2197,11 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue) - } - - ui->name = (void *)ue->name.data; -- } else -- ui->name = NULL; -+ } else { -+ /* part 2: prevent NULL pointer oops */ -+ ui->name = ""; -+ } -+ - ui->name_dir = (void *)ue->name_dir; - - ui->flags = 0; -diff --git a/images/sk-unix.proto b/images/sk-unix.proto -index 8ddbccd..3f77718 100644 ---- a/images/sk-unix.proto -+++ b/images/sk-unix.proto -@@ -54,4 +54,5 @@ message unix_sk_entry { - optional uint32 ns_id = 16; - optional sint32 mnt_id = 17 [default = -1]; - /* Please, don't use field with number 18. */ -+ required sint32 repair_ino = 19; - } --- -2.34.1 - diff --git a/0034-netlink-add-repair-modes-and-clear-resource-when-fai.patch b/0034-netlink-add-repair-modes-and-clear-resource-when-fai.patch deleted file mode 100644 index 122df5b5796737c9e8e2429127f9c083441e9aa8..0000000000000000000000000000000000000000 --- a/0034-netlink-add-repair-modes-and-clear-resource-when-fai.patch +++ /dev/null @@ -1,104 +0,0 @@ -From 9b556899d67d7b20c64422fbde6292528772094d Mon Sep 17 00:00:00 2001 -From: Xiaoguang Li -Date: Mon, 29 Mar 2021 20:58:28 -0400 -Subject: [PATCH 34/72] netlink: add repair modes and clear resource when - failure - -Signed-off-by: Jingxian He ---- - criu/cr-dump.c | 3 +++ - criu/include/net.h | 1 + - criu/sk-netlink.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 49 insertions(+) - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index e0e11cc..b7e0214 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -2073,6 +2073,9 @@ static int cr_dump_finish(int ret) - } else if (ret != 0 && opts.pin_memory) { - pr_info("clear pin mem info\n"); - clear_pin_mem(0); -+ } else if (ret != 0 && opts.with_notifier_kup) { -+ pr_info("repair off netlink fd\n"); -+ netlink_repair_off(); - } - - if (ret != 0 && opts.with_notifier_kup) { -diff --git a/criu/include/net.h b/criu/include/net.h -index 0da4cad..718cc45 100644 ---- a/criu/include/net.h -+++ b/criu/include/net.h -@@ -55,5 +55,6 @@ extern void check_has_netns_ioc(int fd, bool *kdat_val, const char *name); - extern int net_set_ext(struct ns_id *ns); - extern struct ns_id *get_root_netns(void); - extern int read_net_ns_img(void); -+extern int netlink_repair_off(void); - - #endif /* __CR_NET_H__ */ -diff --git a/criu/sk-netlink.c b/criu/sk-netlink.c -index 754eed9..d4b3b7b 100644 ---- a/criu/sk-netlink.c -+++ b/criu/sk-netlink.c -@@ -68,6 +68,47 @@ int netlink_receive_one(struct nlmsghdr *hdr, struct ns_id *ns, void *arg) - return sk_collect_one(m->ndiag_ino, PF_NETLINK, &sd->sd, ns); - } - -+struct netlink_repair_fd { -+ int netlink_fd; -+ struct list_head nlist; -+}; -+ -+static LIST_HEAD(netlink_repair_fds); -+ -+static int netlink_repair_on(int fd) -+{ -+ int ret, aux = 1; -+ struct netlink_repair_fd *nrf; -+ -+ ret = setsockopt(fd, SOL_NETLINK, TCP_REPAIR, &aux, sizeof(aux)); -+ if (ret < 0) { -+ pr_err("Can't turn netlink repair mode ON, error: %d\n", ret); -+ return ret; -+ } -+ nrf = malloc(sizeof(*nrf)); -+ if (!nrf) -+ return -ENOMEM; -+ nrf->netlink_fd = dup(fd); -+ list_add_tail(&nrf->nlist, &netlink_repair_fds); -+ return ret; -+} -+ -+int netlink_repair_off(void) -+{ -+ int aux = 0, ret; -+ struct netlink_repair_fd *nrf, *n; -+ -+ list_for_each_entry_safe(nrf, n, &netlink_repair_fds, nlist) { -+ ret = setsockopt(nrf->netlink_fd, SOL_NETLINK, TCP_REPAIR, &aux, sizeof(aux)); -+ if (ret < 0) -+ pr_err("Failed to turn off repair mode on netlink\n"); -+ close(nrf->netlink_fd); -+ list_del(&nrf->nlist); -+ free(nrf); -+ } -+ return 0; -+} -+ - static bool can_dump_netlink_sk(int lfd) - { - int ret; -@@ -90,6 +131,10 @@ static int dump_one_netlink_fd(int lfd, u32 id, const struct fd_parms *p) - if (IS_ERR(sk)) - goto err; - -+ if (netlink_repair_on(lfd) < 0) { -+ goto err; -+ } -+ - ne.id = id; - ne.ino = p->stat.st_ino; - --- -2.34.1 - diff --git a/0035-sysvshm-add-dump-restore-sysv-shm-in-host-ipc-ns.patch b/0035-sysvshm-add-dump-restore-sysv-shm-in-host-ipc-ns.patch deleted file mode 100644 index 6bdbfc4731428dfd115852585e6a5eb02f207540..0000000000000000000000000000000000000000 --- a/0035-sysvshm-add-dump-restore-sysv-shm-in-host-ipc-ns.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 852b4db35a06ed382e287d88cd055fdf20fc031f Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Sat, 26 Jun 2021 15:18:15 +0800 -Subject: [PATCH 35/72] sysvshm: add dump/restore sysv-shm in host ipc ns - -In original criu design, SysVIPC memory segment, which belongs -to host ipcns, shouldn't be dumped because criu requires the -whole ipcns to be dumped. During the restoring ipcns, the new -shared memory will be created, and fill the original page data -in it. - -This patch makes the shared-memory in host ipcns restore possible. -Idea: - The SysVIPC memory won't disappear after the task exit. The basic -information can be got from `/proc/sysvipc/shm` as long as the -system doesn't reboot. Compared with restoring the whole ipcns, -the processes of the shared memory creating and page data filling -are ignored. - -Reference: -- https://www.criu.org/What_cannot_be_checkpointed - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: fu.lin ---- - criu/cr-dump.c | 8 ++++---- - criu/cr-restore.c | 43 +++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 47 insertions(+), 4 deletions(-) - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index b7e0214..e7b5787 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -463,11 +463,11 @@ static int dump_filemap(struct vma_area *vma_area, int fd) - - static int check_sysvipc_map_dump(pid_t pid, VmaEntry *vma) - { -- if (root_ns_mask & CLONE_NEWIPC) -- return 0; -+ if (!(root_ns_mask & CLONE_NEWIPC)) -+ pr_info("Task %d with SysVIPC shmem map @%" PRIx64 " doesn't live in IPC ns\n", -+ pid, vma->start); - -- pr_err("Task %d with SysVIPC shmem map @%" PRIx64 " doesn't live in IPC ns\n", pid, vma->start); -- return -1; -+ return 0; - } - - static int get_task_auxv(pid_t pid, MmEntry *mm) -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 09f135b..152bace 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -1818,6 +1818,46 @@ static int create_children_and_session(void) - return 0; - } - -+static int prepare_rootns_sysv_shm(unsigned long clone_flags) -+{ -+ int retval = 0; -+ char *line = NULL; -+ size_t len = 0; -+ FILE *fp; -+ key_t key; -+ int shmid; -+ mode_t mode; -+ size_t size; -+ -+ /* This is completed by `prepare_namespace()` */ -+ if (!!(clone_flags & CLONE_NEWIPC)) -+ return 0; -+ -+ pr_info("Restoring SYSV shm in host namespace\n"); -+ -+ fp = fopen("/proc/sysvipc/shm", "r"); -+ if (fp == NULL) { -+ pr_err("Can't open '/proc/sysvipc/shm', errno(%d): %s\n", errno, strerror(errno)); -+ return -1; -+ } -+ -+ while (getline(&line, &len, fp) != -1) { -+ if (sscanf(line, "%d %d %o %lu", &key, &shmid, &mode, &size) != 4) -+ continue; -+ -+ pr_debug("sscanf key: %d shmid: %d mode %o size %lu\n", -+ key, shmid, mode, size); -+ -+ retval = collect_sysv_shmem(shmid, size); -+ if (retval != 0) -+ goto out; -+ } -+ -+out: -+ fclose(fp); -+ return retval; -+} -+ - static int restore_task_with_children(void *_arg) - { - struct cr_clone_arg *ca = _arg; -@@ -1924,6 +1964,9 @@ static int restore_task_with_children(void *_arg) - if (prepare_namespace(current, ca->clone_flags)) - goto err; - -+ if (prepare_rootns_sysv_shm(ca->clone_flags)) -+ goto err; -+ - if (restore_finish_ns_stage(CR_STATE_PREPARE_NAMESPACES, CR_STATE_FORKING) < 0) - goto err; - --- -2.34.1 - diff --git a/0036-add-O_REPAIR-flag-to-vma-fd.patch b/0036-add-O_REPAIR-flag-to-vma-fd.patch deleted file mode 100644 index 3e937f9bcd14bf33ed330447ec66551e1c9cefcc..0000000000000000000000000000000000000000 --- a/0036-add-O_REPAIR-flag-to-vma-fd.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 92fd13a21e52343b532eb1a163a159303107a6e2 Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Thu, 24 Jun 2021 16:56:02 +0800 -Subject: [PATCH 36/72] add O_REPAIR flag to vma fd - -Add O_REPAIR flag when openning vma fd. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: Jingxian He ---- - criu/files-reg.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/criu/files-reg.c b/criu/files-reg.c -index b9576a4..7bd8592 100644 ---- a/criu/files-reg.c -+++ b/criu/files-reg.c -@@ -2255,6 +2255,7 @@ void filemap_ctx_fini(void) - } - } - -+#define O_REPAIR 040000000 - static int open_filemap(int pid, struct vma_area *vma) - { - u32 flags; -@@ -2267,13 +2268,15 @@ static int open_filemap(int pid, struct vma_area *vma) - */ - - BUG_ON((vma->vmfd == NULL) || !vma->e->has_fdflags); -- flags = vma->e->fdflags; -+ flags = vma->e->fdflags | O_REPAIR; - - if (ctx.flags != flags || ctx.desc != vma->vmfd) { - if (vma->e->status & VMA_AREA_MEMFD) - ret = memfd_open(vma->vmfd, &flags); -- else -+ else { -+ - ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags); -+ } - if (ret < 0) - return ret; - --- -2.34.1 - diff --git a/0037-looser-file-mode-and-size-check.patch b/0037-looser-file-mode-and-size-check.patch deleted file mode 100644 index 1948c60bfbc10502528370583cfb0c0672599739..0000000000000000000000000000000000000000 --- a/0037-looser-file-mode-and-size-check.patch +++ /dev/null @@ -1,90 +0,0 @@ -From bb60f8e71ec85dd11666bbb395508fac4403c251 Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Sat, 26 Jun 2021 11:41:18 +0800 -Subject: [PATCH 37/72] looser file mode and size check - -When the file mode and size larger than dump data, -make the restoring process run success. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: Jingxian He -Signed-off-by: fu.lin ---- - criu/config.c | 1 + - criu/crtools.c | 1 + - criu/files-reg.c | 14 +++++++++++--- - criu/include/cr_options.h | 1 + - 4 files changed, 14 insertions(+), 3 deletions(-) - -diff --git a/criu/config.c b/criu/config.c -index bd0f84d..a9eb699 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -704,6 +704,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - BOOL_OPT("dump-char-dev", &opts.dump_char_dev), - BOOL_OPT("with-fd-cred", &opts.with_fd_cred), - BOOL_OPT("mask-exit-notify", &opts.mask_exit_notify), -+ BOOL_OPT("weak-file-check", &opts.weak_file_check), - {}, - }; - -diff --git a/criu/crtools.c b/criu/crtools.c -index 1a41be4..e1afeca 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -455,6 +455,7 @@ usage: - \ - " --with-fd-cred Allow to make the restored process has the same cred\n" - " --mask-exit-notify Mask task exit notify during dump and restore\n" -+ " --weak-file-check Allow file size and mod larger than dumping value\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/files-reg.c b/criu/files-reg.c -index 7bd8592..1a3b836 100644 ---- a/criu/files-reg.c -+++ b/criu/files-reg.c -@@ -1991,7 +1991,10 @@ static bool validate_file(const int fd, const struct stat *fd_status, const stru - { - int result = 1; - -- if (rfi->rfe->has_size && (fd_status->st_size != rfi->rfe->size)) { -+ /* NOTICE: customize for the storage module upgrade feature */ -+ if (rfi->rfe->has_size -+ && ((!opts.weak_file_check && fd_status->st_size != rfi->rfe->size) -+ || (fd_status->st_size < rfi->rfe->size))) { - pr_err("File %s has bad size %" PRIu64 " (expect %" PRIu64 ")\n", rfi->path, fd_status->st_size, - rfi->rfe->size); - return false; -@@ -2102,8 +2105,13 @@ ext: - if (!validate_file(tmp, &st, rfi)) - return -1; - -- if (rfi->rfe->has_mode && (st.st_mode != rfi->rfe->mode)) { -- pr_err("File %s has bad mode 0%o (expect 0%o)\n", rfi->path, (int)st.st_mode, rfi->rfe->mode); -+ /* NOTICE: customize for the storage module upgrade feature */ -+ if (rfi->rfe->has_mode -+ && ((!opts.weak_file_check && st.st_mode != rfi->rfe->mode) -+ || (st.st_mode < rfi->rfe->mode))) { -+ pr_err("File %s has bad mode 0%o (expect 0%o), weak check %d\n", -+ rfi->path, (int)st.st_mode, rfi->rfe->mode, -+ opts.weak_file_check); - return -1; - } - -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index 26ae5b6..dec0082 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -196,6 +196,7 @@ struct cr_options { - int dump_char_dev; - int with_fd_cred; - int mask_exit_notify; -+ int weak_file_check; - }; - - extern struct cr_options opts; --- -2.34.1 - diff --git a/0038-file-lock-add-repair-mode-to-dump-file-locks.patch b/0038-file-lock-add-repair-mode-to-dump-file-locks.patch deleted file mode 100644 index b1f5ccd5902c7de2c8c4abb9a0d2a858d75a1801..0000000000000000000000000000000000000000 --- a/0038-file-lock-add-repair-mode-to-dump-file-locks.patch +++ /dev/null @@ -1,308 +0,0 @@ -From 61ca95f5434573e89151d3557185c517cd69447a Mon Sep 17 00:00:00 2001 -From: Sang Yan -Date: Thu, 8 Jul 2021 14:12:42 +0800 -Subject: [PATCH 38/72] file-lock: add repair mode to dump file locks - -Add new options "--file-locks-repair" to enable repair mode -while dumping file locks. -Repair mode keeps locks locked while process were killed in -dumping operation. Then resume the locks from repair mode at -process resuming. - -Signed-off-by: Sang Yan -Signed-off-by: fu.lin ---- - criu/config.c | 1 + - criu/cr-dump.c | 8 ++++++ - criu/crtools.c | 1 + - criu/file-lock.c | 10 +++++++ - criu/include/cr_options.h | 1 + - criu/include/fcntl.h | 16 +++++++++++ - criu/include/parasite-syscall.h | 2 ++ - criu/include/parasite.h | 10 +++++++ - criu/parasite-syscall.c | 33 +++++++++++++++++++++++ - criu/pie/parasite.c | 48 +++++++++++++++++++++++++++++++++ - 10 files changed, 130 insertions(+) - -diff --git a/criu/config.c b/criu/config.c -index a9eb699..0a0623a 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -705,6 +705,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - BOOL_OPT("with-fd-cred", &opts.with_fd_cred), - BOOL_OPT("mask-exit-notify", &opts.mask_exit_notify), - BOOL_OPT("weak-file-check", &opts.weak_file_check), -+ BOOL_OPT("file-locks-repair", &opts.file_locks_repair), - {}, - }; - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index e7b5787..607eac2 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -1679,6 +1679,14 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - goto err_cure; - } - -+ if (opts.file_locks_repair) { -+ ret = parasite_dump_file_locks(parasite_ctl, pid); -+ if (ret) { -+ pr_err("Can't parasite dump file locks (pid: %d)\n", pid); -+ goto err_cure; -+ } -+ } -+ - ret = dump_task_core_all(parasite_ctl, item, &pps_buf, cr_imgset, &misc); - if (ret) { - pr_err("Dump core (pid: %d) failed with %d\n", pid, ret); -diff --git a/criu/crtools.c b/criu/crtools.c -index e1afeca..7358918 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -456,6 +456,7 @@ usage: - " --with-fd-cred Allow to make the restored process has the same cred\n" - " --mask-exit-notify Mask task exit notify during dump and restore\n" - " --weak-file-check Allow file size and mod larger than dumping value\n" -+ " --file-locks-repair Use repair mode to dump and restore file locks\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/file-lock.c b/criu/file-lock.c -index 6334462..c893083 100644 ---- a/criu/file-lock.c -+++ b/criu/file-lock.c -@@ -424,6 +424,8 @@ void discard_dup_locks_tail(pid_t pid, int fd) - list_for_each_entry_safe_reverse(fl, p, &file_lock_list, list) { - if (fl->owners_fd != fd || pid != fl->fl_holder) - break; -+ if (fl->fl_kind == FL_POSIX) -+ continue; - - list_del(&fl->list); - xfree(fl); -@@ -611,8 +613,12 @@ static int restore_file_lock(FileLockEntry *fle) - cmd = fle->type; - } else if (fle->type == F_RDLCK) { - cmd = LOCK_SH; -+ if (opts.file_locks_repair) -+ cmd = LOCK_REPAIR; - } else if (fle->type == F_WRLCK) { - cmd = LOCK_EX; -+ if (opts.file_locks_repair) -+ cmd = LOCK_REPAIR; - } else if (fle->type == F_UNLCK) { - cmd = LOCK_UN; - } else { -@@ -638,6 +644,10 @@ static int restore_file_lock(FileLockEntry *fle) - flk.l_pid = fle->pid; - flk.l_type = fle->type; - -+ if (opts.file_locks_repair -+ && (fle->type == F_RDLCK || fle->type == F_WRLCK)) -+ flk.l_type = F_REPAIR; -+ - pr_info("(posix)flag: %d, type: %d, pid: %d, fd: %d, " - "start: %8" PRIx64 ", len: %8" PRIx64 "\n", - fle->flag, fle->type, fle->pid, fle->fd, fle->start, fle->len); -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index dec0082..9ec8034 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -197,6 +197,7 @@ struct cr_options { - int with_fd_cred; - int mask_exit_notify; - int weak_file_check; -+ int file_locks_repair; - }; - - extern struct cr_options opts; -diff --git a/criu/include/fcntl.h b/criu/include/fcntl.h -index 568977c..0627818 100644 ---- a/criu/include/fcntl.h -+++ b/criu/include/fcntl.h -@@ -23,6 +23,22 @@ struct f_owner_ex { - #define F_SETCRED 18 - #endif - -+#ifndef F_NEED_REPAIR -+#define F_NEED_REPAIR 16 -+#endif -+ -+#ifndef F_REPAIR -+#define F_REPAIR 32 -+#endif -+ -+#ifndef LOCK_NEED_REPAIR -+#define LOCK_NEED_REPAIR 256 /* REPAIRING lock */ -+#endif -+ -+#ifndef LOCK_REPAIR -+#define LOCK_REPAIR 512 /* REPAIR lock */ -+#endif -+ - /* - * These things are required to compile on CentOS-6 - */ -diff --git a/criu/include/parasite-syscall.h b/criu/include/parasite-syscall.h -index 4540e11..9f2d3e0 100644 ---- a/criu/include/parasite-syscall.h -+++ b/criu/include/parasite-syscall.h -@@ -48,4 +48,6 @@ extern int parasite_dump_cgroup(struct parasite_ctl *ctl, struct parasite_dump_c - - extern struct parasite_tty_args *parasite_dump_tty(struct parasite_ctl *ctl, int fd, int type); - -+extern int parasite_dump_file_locks(struct parasite_ctl *ctl, int pid); -+ - #endif /* __CR_PARASITE_SYSCALL_H__ */ -diff --git a/criu/include/parasite.h b/criu/include/parasite.h -index d2a0688..230c453 100644 ---- a/criu/include/parasite.h -+++ b/criu/include/parasite.h -@@ -37,6 +37,7 @@ enum { - PARASITE_CMD_CHECK_VDSO_MARK, - PARASITE_CMD_CHECK_AIOS, - PARASITE_CMD_DUMP_CGROUP, -+ PARASITE_CMD_DUMP_FILELOCKS, - - PARASITE_CMD_MAX, - }; -@@ -244,6 +245,15 @@ struct parasite_dump_cgroup_args { - char contents[1 << 12]; - }; - -+struct parasite_dump_filelocks_args { -+ short kind; -+ short type; -+ long start; -+ long len; -+ int pid; -+ int fd; -+}; -+ - #endif /* !__ASSEMBLY__ */ - - #endif /* __CR_PARASITE_H__ */ -diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c -index ee4fa86..c57f854 100644 ---- a/criu/parasite-syscall.c -+++ b/criu/parasite-syscall.c -@@ -32,6 +32,7 @@ - #include - #include "signal.h" - #include "sigframe.h" -+#include "file-lock.h" - - #include - #include -@@ -654,3 +655,35 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct pstree_item *item, - - return ctl; - } -+ -+int parasite_dump_file_locks(struct parasite_ctl *ctl, int pid) -+{ -+ struct parasite_dump_filelocks_args *args; -+ struct file_lock *fl; -+ int ret; -+ -+ args = compel_parasite_args(ctl, struct parasite_dump_filelocks_args); -+ -+ list_for_each_entry(fl, &file_lock_list, list) { -+ if (fl->real_owner != pid) -+ continue; -+ -+ args->pid = fl->real_owner; -+ args->fd = fl->owners_fd; -+ args->kind = fl->fl_kind; -+ args->type = fl->fl_ltype; -+ args->start = fl->start; -+ if (!strncmp(fl->end, "EOF", 3)) -+ args->len = 0; -+ else -+ args->len = (atoll(fl->end) + 1) - fl->start; -+ -+ ret = compel_rpc_call_sync(PARASITE_CMD_DUMP_FILELOCKS, ctl); -+ if (ret < 0) { -+ pr_err("Parasite dump file lock failed! (pid: %d)\n", pid); -+ return ret; -+ } -+ } -+ -+ return 0; -+} -diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c -index e49958b..c781303 100644 ---- a/criu/pie/parasite.c -+++ b/criu/pie/parasite.c -@@ -22,6 +22,7 @@ - #include "criu-log.h" - #include "tty.h" - #include "aio.h" -+#include "file-lock.h" - - #include "asm/parasite.h" - #include "restorer.h" -@@ -769,6 +770,50 @@ static int parasite_dump_cgroup(struct parasite_dump_cgroup_args *args) - return 0; - } - -+static int set_filelocks_needrepair(struct parasite_dump_filelocks_args *args) -+{ -+ int ret; -+ -+ if (args->kind == FL_FLOCK) { -+ if (args->type == F_RDLCK || args->type == F_WRLCK) { -+ int cmd = LOCK_NEED_REPAIR; -+ -+ pr_info("Need Repair flock kind: %d, type: %d, cmd: %d, pid: %d, fd: %d\n", -+ args->kind, args->type, cmd, args->pid, args->fd); -+ -+ ret = sys_flock(args->fd, cmd); -+ if (ret < 0) { -+ pr_err("Can not set NEED_REPAIR flock!\n"); -+ return ret; -+ } -+ } -+ } else if (args->kind == FL_POSIX) { -+ if (args->type == F_RDLCK || args->type == F_WRLCK) { -+ struct flock flk; -+ memset(&flk, 0, sizeof(flk)); -+ -+ flk.l_whence = SEEK_SET; -+ flk.l_start = args->start; -+ flk.l_len = args->len; -+ flk.l_pid = args->pid; -+ flk.l_type = F_NEED_REPAIR; -+ -+ pr_info("Need Repair posix lock kind: %d, type: %d, cmd: %d, pid: %d, fd: %d, " -+ "start: %8"PRIx64", len: %8"PRIx64"\n", -+ args->kind, args->type, flk.l_type, args->pid, args->fd, -+ args->start, args->len); -+ -+ ret = sys_fcntl(args->fd, F_SETLKW, (long)&flk); -+ if (ret < 0) { -+ pr_err("Can not set NEED_REPAIR posix lock!\n"); -+ return ret; -+ } -+ } -+ } -+ -+ return 0; -+} -+ - void parasite_cleanup(void) - { - if (mprotect_args) { -@@ -821,6 +866,9 @@ int parasite_daemon_cmd(int cmd, void *args) - case PARASITE_CMD_DUMP_CGROUP: - ret = parasite_dump_cgroup(args); - break; -+ case PARASITE_CMD_DUMP_FILELOCKS: -+ ret = set_filelocks_needrepair(args); -+ break; - default: - pr_err("Unknown command in parasite daemon thread leader: %d\n", cmd); - ret = -1; --- -2.34.1 - diff --git a/0039-unlock-network-when-restore-fails.patch b/0039-unlock-network-when-restore-fails.patch deleted file mode 100644 index 43f6c8b916d23fdf184232b2737262f64c428ea2..0000000000000000000000000000000000000000 --- a/0039-unlock-network-when-restore-fails.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 5421245cf87bac71cbe999f257ba5b3a96c8733b Mon Sep 17 00:00:00 2001 -From: Liu Chao -Date: Fri, 9 Jul 2021 07:32:20 +0000 -Subject: [PATCH 39/72] unlock network when restore fails - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: fu.lin ---- - criu/cr-restore.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 152bace..d19768d 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -115,6 +115,9 @@ - #endif - - struct pstree_item *current; -+#define NETWORK_COLLECTED 0x1 -+#define NETWORK_UNLOCK 0x2 -+static int network_status = 0; - - static int restore_task_with_children(void *); - static int sigreturn_restore(pid_t pid, struct task_restore_args *ta, unsigned long alen, CoreEntry *core); -@@ -249,6 +252,7 @@ static int crtools_prepare_shared(void) - /* Connections are unlocked from criu */ - if (!files_collected() && collect_image(&inet_sk_cinfo)) - return -1; -+ network_status |= NETWORK_COLLECTED; - - if (collect_binfmt_misc()) - return -1; -@@ -2525,6 +2529,7 @@ skip_ns_bouncing: - - /* Unlock network before disabling repair mode on sockets */ - network_unlock(); -+ network_status |= NETWORK_UNLOCK; - - /* - * Stop getting sigchld, after we resume the tasks they -@@ -2734,6 +2739,14 @@ clean_cgroup: - fini_cgroup(); - err: - cr_plugin_fini(CR_PLUGIN_STAGE__RESTORE, ret); -+ if (ret < 0) { -+ if (!!(network_status & NETWORK_COLLECTED) -+ && !files_collected() && collect_image(&inet_sk_cinfo)) -+ pr_err("collect inet sk cinfo fail\n"); -+ -+ if (!!(network_status & NETWORK_UNLOCK)) -+ network_unlock(); -+ } - return ret; - } - --- -2.34.1 - diff --git a/0040-net-add-shared-socket-recover-method-for-criu.patch b/0040-net-add-shared-socket-recover-method-for-criu.patch deleted file mode 100644 index 8de2a889d5b979ea83dfa4c9ac1885678bf3a248..0000000000000000000000000000000000000000 --- a/0040-net-add-shared-socket-recover-method-for-criu.patch +++ /dev/null @@ -1,332 +0,0 @@ -From a22542173083d2eeb5dde627c47452ea641c98c1 Mon Sep 17 00:00:00 2001 -From: Sang Yan -Date: Mon, 12 Jul 2021 16:14:45 +0800 -Subject: [PATCH 40/72] net: add shared socket recover method for criu - -When the socket file is shared with another process, -it will not be freed during dumping process. -We can repair the socket file by installing it to -the old fd number. - -Add new options: "--share-dst-ports" and "--share-src-ports" -for user to tell criu which socket ports are shared. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: Jingxian He ---- - criu/config.c | 8 ++ - criu/crtools.c | 3 + - criu/files.c | 18 ++++- - criu/include/cr_options.h | 2 + - criu/include/files.h | 4 + - criu/include/net.h | 1 + - criu/include/sk-inet.h | 3 + - criu/sk-inet.c | 151 ++++++++++++++++++++++++++++++++++++++ - 8 files changed, 189 insertions(+), 1 deletion(-) - -diff --git a/criu/config.c b/criu/config.c -index 0a0623a..7e92731 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -706,6 +706,8 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - BOOL_OPT("mask-exit-notify", &opts.mask_exit_notify), - BOOL_OPT("weak-file-check", &opts.weak_file_check), - BOOL_OPT("file-locks-repair", &opts.file_locks_repair), -+ { "share-dst-ports", required_argument, 0, 2000 }, -+ { "share-src-ports", required_argument, 0, 2001 }, - {}, - }; - -@@ -1041,6 +1043,12 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - return 1; - } - break; -+ case 2000: -+ SET_CHAR_OPTS(share_dst_ports, optarg); -+ break; -+ case 2001: -+ SET_CHAR_OPTS(share_src_ports, optarg); -+ break; - case 'V': - pr_msg("Version: %s\n", CRIU_VERSION); - if (strcmp(CRIU_GITID, "0")) -diff --git a/criu/crtools.c b/criu/crtools.c -index 7358918..cfa149a 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -104,6 +104,9 @@ int main(int argc, char *argv[], char *envp[]) - goto usage; - } - -+ if (parse_share_ports()) -+ goto usage; -+ - log_set_loglevel(opts.log_level); - - if (optind < argc && !strcmp(argv[optind], "swrk")) { -diff --git a/criu/files.c b/criu/files.c -index 1ec5281..1c52cf4 100644 ---- a/criu/files.c -+++ b/criu/files.c -@@ -705,6 +705,8 @@ int dump_my_file(int lfd, u32 *id, int *type) - return 0; - } - -+int dst_pid; -+ - int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, struct parasite_drain_fd *dfds) - { - int *lfds = NULL; -@@ -728,7 +730,7 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s - img = open_image(CR_FD_FDINFO, O_DUMP, item->ids->files_id); - if (!img) - goto err; -- -+ dst_pid = item->pid->real; - ret = 0; /* Don't fail if nr_fds == 0 */ - for (off = 0; ret == 0 && off < dfds->nr_fds; off += nr_fds) { - if (nr_fds + off > dfds->nr_fds) -@@ -1237,6 +1239,20 @@ static int open_fd(struct fdinfo_list_entry *fle) - goto out; - } - -+ if (d->ops->type == FD_TYPES__INETSK) { -+ if (check_need_repair(d)) { -+ ret = repair_share_socket(d->id); -+ if (!ret) { -+ new_fd = get_share_socket(); -+ pr_info("get share socket:%d\n", new_fd); -+ if (new_fd <= 0 || setup_and_serve_out(fle, new_fd) < 0) -+ return -1; -+ fle->stage = FLE_RESTORED; -+ return 0; -+ } -+ } -+ } -+ - /* - * Open method returns the following values: - * 0 -- restore is successfully finished; -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index 9ec8034..b7c1e34 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -198,6 +198,8 @@ struct cr_options { - int mask_exit_notify; - int weak_file_check; - int file_locks_repair; -+ char *share_dst_ports; -+ char *share_src_ports; - }; - - extern struct cr_options opts; -diff --git a/criu/include/files.h b/criu/include/files.h -index 1d979a9..0521c7e 100644 ---- a/criu/include/files.h -+++ b/criu/include/files.h -@@ -201,4 +201,8 @@ extern int open_transport_socket(void); - extern int set_fds_event(pid_t virt); - extern void wait_fds_event(void); - -+extern int repair_share_socket(int id); -+extern int check_need_repair(struct file_desc *d); -+extern int get_share_socket(void); -+ - #endif /* __CR_FILES_H__ */ -diff --git a/criu/include/net.h b/criu/include/net.h -index 718cc45..ec47b61 100644 ---- a/criu/include/net.h -+++ b/criu/include/net.h -@@ -16,6 +16,7 @@ extern int dump_net_ns(struct ns_id *ns); - extern int prepare_net_namespaces(void); - extern void fini_net_namespaces(void); - extern int netns_keep_nsfd(void); -+extern int parse_share_ports(void); - - struct pstree_item; - extern int restore_task_net_ns(struct pstree_item *current); -diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h -index c832d63..27deceb 100644 ---- a/criu/include/sk-inet.h -+++ b/criu/include/sk-inet.h -@@ -101,4 +101,7 @@ struct rst_tcp_sock { - union libsoccr_addr; - int restore_sockaddr(union libsoccr_addr *sa, int family, u32 pb_port, u32 *pb_addr, u32 ifindex); - -+#define MAX_SHARE_PORT_NUM 64 -+extern int dst_pid; -+ - #endif /* __CR_SK_INET_H__ */ -diff --git a/criu/sk-inet.c b/criu/sk-inet.c -index 05048c8..c7de793 100644 ---- a/criu/sk-inet.c -+++ b/criu/sk-inet.c -@@ -431,6 +431,152 @@ static bool needs_scope_id(uint32_t *src_addr) - return false; - } - -+#define ADD_SHARE_SOCKET_PATH "/sys/kernel/add_share_socket" -+#define REPAIR_SHARE_SOCKET_PATH "/sys/kernel/repair_share_socket" -+#define SHARE_SOCKET_PATH "/sys/kernel/share_socket" -+ -+int add_share_socket(u32 id, int fd, int pid, int port) -+{ -+ int retval; -+ char buf[256] = {0}; -+ -+ retval = snprintf(buf, 256, "%u,%d,%d,%d", id, fd, pid, port); -+ if (retval <= 0) -+ return -EFAULT; -+ -+ fd = open(ADD_SHARE_SOCKET_PATH, O_WRONLY, 0); -+ if (fd < 0) { -+ pr_err("open file:%s fail\n", ADD_SHARE_SOCKET_PATH); -+ return fd; -+ } -+ -+ retval = write(fd, buf, strlen(buf)); -+ close(fd); -+ return retval < 0 ? -1 : 0; -+} -+ -+ -+int repair_share_socket(int id) -+{ -+ int retval, fd; -+ char buf[256] = {0}; -+ -+ retval = snprintf(buf, 256, "%u", id); -+ if (retval <= 0) -+ return -EFAULT; -+ -+ fd = open(REPAIR_SHARE_SOCKET_PATH, O_WRONLY, 0); -+ if (fd < 0) { -+ pr_err("open file:%s fail\n", REPAIR_SHARE_SOCKET_PATH); -+ return fd; -+ } -+ retval = write(fd, buf, strlen(buf)); -+ -+ close(fd); -+ return retval < 0 ? -1 : 0; -+} -+ -+int get_share_socket(void) -+{ -+ int fd; -+ ssize_t count; -+ int retval = -1; -+ char buf[32] = {0}; -+ -+ fd = open(SHARE_SOCKET_PATH, O_RDONLY, 0); -+ if (fd < 0) { -+ pr_err("open file:%s fail\n", SHARE_SOCKET_PATH); -+ return fd; -+ } -+ -+ count = read(fd, buf, sizeof(buf)); -+ if (count > 0) -+ retval = atoi(buf); -+ -+ close(fd); -+ return retval; -+} -+ -+int g_share_dst_ports[MAX_SHARE_PORT_NUM]; -+int g_share_dst_port_num; -+int g_share_src_ports[MAX_SHARE_PORT_NUM]; -+int g_share_src_port_num; -+ -+int parse_share_ports(void) -+{ -+ char *save, *p; -+ -+ if (opts.share_dst_ports) { -+ p = strtok_r(opts.share_dst_ports, ",", &save); -+ while (p != NULL) { -+ if (g_share_dst_port_num >= MAX_SHARE_PORT_NUM) -+ return -1; -+ g_share_dst_ports[g_share_dst_port_num] = atoi(p); -+ if (!g_share_dst_ports[g_share_dst_port_num]) -+ return -1; -+ g_share_dst_port_num++; -+ p = strtok_r(NULL, ",", &save); -+ } -+ } -+ -+ if (opts.share_src_ports) { -+ p = strtok_r(opts.share_src_ports, ",", &save); -+ while (p != NULL) { -+ if (g_share_src_port_num >= MAX_SHARE_PORT_NUM) -+ return -1; -+ g_share_src_ports[g_share_src_port_num] = atoi(p); -+ if (!g_share_src_ports[g_share_src_port_num]) -+ return -1; -+ g_share_src_port_num++; -+ p = strtok_r(NULL, ",", &save); -+ } -+ } -+ return 0; -+} -+ -+int check_share_dst_port(int dst_port) -+{ -+ int i; -+ int ret = 0; -+ -+ for (i = 0; i < g_share_dst_port_num; i++) { -+ if (dst_port == g_share_dst_ports[i]) { -+ ret = 1; -+ break; -+ } -+ } -+ return ret; -+} -+ -+int check_share_src_port(int src_port) -+{ -+ int i; -+ int ret = 0; -+ -+ for (i = 0; i < g_share_src_port_num; i++) { -+ if (src_port == g_share_src_ports[i]) { -+ ret = 1; -+ break; -+ } -+ } -+ -+ return ret; -+} -+ -+int check_need_repair(struct file_desc *d) -+{ -+ struct inet_sk_info *ii; -+ InetSkEntry *ie; -+ -+ ii = container_of(d, struct inet_sk_info, d); -+ ie = ii->ie; -+ if (check_share_dst_port(ie->dst_port) || -+ check_share_src_port(ie->src_port)) -+ return 1; -+ else -+ return 0; -+} -+ - static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int family) - { - struct inet_sk_desc *sk; -@@ -488,6 +634,11 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa - - BUG_ON(sk->sd.already_dumped); - -+ if (check_share_dst_port(sk->dst_port) || check_share_src_port(sk->src_port)) { -+ pr_info("Start add share prot:%d src %d\n", sk->dst_port, sk->src_port); -+ add_share_socket(id, lfd, dst_pid, sk->src_port); -+ } -+ - ie.id = id; - ie.ino = sk->sd.ino; - if (sk->sd.sk_ns) { --- -2.34.1 - diff --git a/0041-tcp-save-src-ports-to-ip_local_reserved_ports-when-d.patch b/0041-tcp-save-src-ports-to-ip_local_reserved_ports-when-d.patch deleted file mode 100644 index 1159098e6313d0850b5bd3f906ac85d734f77d78..0000000000000000000000000000000000000000 --- a/0041-tcp-save-src-ports-to-ip_local_reserved_ports-when-d.patch +++ /dev/null @@ -1,273 +0,0 @@ -From aac63cee766bb6840326d008ed1b1993bb7c629a Mon Sep 17 00:00:00 2001 -From: Liu Chao -Date: Mon, 19 Jul 2021 03:19:30 +0000 -Subject: [PATCH 41/72] tcp: save src ports to ip_local_reserved_ports when - dump tasks and retore it when restore tasks - -Signed-off-by: Liu Chao -Signed-off-by: fu.lin ---- - criu/Makefile.crtools | 1 + - criu/config.c | 8 ++- - criu/cr-dump.c | 4 ++ - criu/crtools.c | 1 + - criu/include/cr_options.h | 1 + - criu/include/reserved-ports.h | 10 ++++ - criu/net.c | 6 +++ - criu/reserved-ports.c | 98 +++++++++++++++++++++++++++++++++++ - criu/sk-tcp.c | 2 +- - 9 files changed, 129 insertions(+), 2 deletions(-) - create mode 100644 criu/include/reserved-ports.h - create mode 100644 criu/reserved-ports.c - -diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools -index 65cc215..3e522b4 100644 ---- a/criu/Makefile.crtools -+++ b/criu/Makefile.crtools -@@ -94,6 +94,7 @@ obj-y += pin-mem.o - obj-y += devname.o - obj-y += files-chr.o - obj-y += exit-notify.o -+obj-y += reserved-ports.o - obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o - obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o - CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 -diff --git a/criu/config.c b/criu/config.c -index 7e92731..ae5f81e 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -615,7 +615,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - "no-" OPT_NAME, no_argument, SAVE_TO, false \ - } - -- static const char short_opts[] = "dSsRt:hD:o:v::x::Vr:jJ:lW:L:M:"; -+ static const char short_opts[] = "dSsRt:hD:o:v::x::Vr:jJ:lW:L:M:P:"; - static struct option long_opts[] = { - { "tree", required_argument, 0, 't' }, - { "leave-stopped", no_argument, 0, 's' }, -@@ -708,6 +708,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - BOOL_OPT("file-locks-repair", &opts.file_locks_repair), - { "share-dst-ports", required_argument, 0, 2000 }, - { "share-src-ports", required_argument, 0, 2001 }, -+ { "reserve-ports", required_argument, 0, 'P' }, - {}, - }; - -@@ -1057,6 +1058,11 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - case 'h': - *usage_error = false; - return 2; -+ case 'P': -+ opts.reserve_ports = atoi(optarg); -+ if (opts.reserve_ports < 0) -+ goto bad_arg; -+ break; - default: - return 2; - } -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index 607eac2..a8ab61e 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -89,6 +89,7 @@ - #include "pin-mem.h" - #include "notifier.h" - #include "files-chr.h" -+#include "reserved-ports.h" - - /* - * Architectures can overwrite this function to restore register sets that -@@ -2223,6 +2224,9 @@ int cr_dump_tasks(pid_t pid) - goto err; - } - -+ if (opts.reserve_ports > 0) -+ set_reserved_ports(); -+ - if (parent_ie) { - inventory_entry__free_unpacked(parent_ie, NULL); - parent_ie = NULL; -diff --git a/criu/crtools.c b/criu/crtools.c -index cfa149a..ae858e8 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -460,6 +460,7 @@ usage: - " --mask-exit-notify Mask task exit notify during dump and restore\n" - " --weak-file-check Allow file size and mod larger than dumping value\n" - " --file-locks-repair Use repair mode to dump and restore file locks\n" -+ " --reserve-ports Reserve src ports in kernel\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index b7c1e34..3b61c6b 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -200,6 +200,7 @@ struct cr_options { - int file_locks_repair; - char *share_dst_ports; - char *share_src_ports; -+ int reserve_ports; - }; - - extern struct cr_options opts; -diff --git a/criu/include/reserved-ports.h b/criu/include/reserved-ports.h -new file mode 100644 -index 0000000..b614482 ---- /dev/null -+++ b/criu/include/reserved-ports.h -@@ -0,0 +1,10 @@ -+#ifndef __CRIU_RESERVED_PORTS_H__ -+#define __CRIU_RESERVED_PORTS_H__ -+ -+#define RESERVED_PORTS_PATH "/proc/sys/net/ipv4/ip_local_reserved_ports" -+ -+extern void read_reserved_ports(char *path); -+extern void write_reserved_ports(char *path); -+extern void set_reserved_ports(void); -+ -+#endif /* __CRIU_RESERVED_PORTS_H__ */ -diff --git a/criu/net.c b/criu/net.c -index 7b45f06..fff4c85 100644 ---- a/criu/net.c -+++ b/criu/net.c -@@ -46,6 +46,7 @@ - #include "external.h" - #include "fdstore.h" - #include "netfilter.h" -+#include "reserved-ports.h" - - #include "protobuf.h" - #include "images/netdev.pb-c.h" -@@ -3193,6 +3194,11 @@ void network_unlock(void) - { - pr_info("Unlock network\n"); - -+ if (opts.reserve_ports) { -+ read_reserved_ports("ip_local_reserved_ports"); -+ write_reserved_ports(RESERVED_PORTS_PATH); -+ } -+ - cpt_unlock_tcp_connections(); - rst_unlock_tcp_connections(); - -diff --git a/criu/reserved-ports.c b/criu/reserved-ports.c -new file mode 100644 -index 0000000..b4996ab ---- /dev/null -+++ b/criu/reserved-ports.c -@@ -0,0 +1,98 @@ -+#include -+#include -+#include -+#include -+#include -+ -+#include "log.h" -+#include "cr_options.h" -+#include "util.h" -+#include "sk-inet.h" -+#include "reserved-ports.h" -+ -+#include "common/list.h" -+ -+static char* reserved_ports; -+static int reserved_ports_num; -+extern struct list_head cpt_tcp_repair_sockets; -+ -+void read_reserved_ports(char *path) -+{ -+ FILE *file = NULL; -+ char *ch = NULL; -+ size_t size = 0; -+ -+ if (reserved_ports) { -+ free(reserved_ports); -+ reserved_ports = NULL; -+ } -+ -+ file = fopen(path, "r"); -+ if (!file) { -+ pr_err("Cannot fopen %s\n", path); -+ return; -+ } -+ -+ if (getline(&reserved_ports, &size, file) <= 0) -+ pr_err("Cannot getline from %s\n", path); -+ fclose(file); -+ -+ if (!reserved_ports) -+ return; -+ -+ ch = strstr(reserved_ports, "\n"); -+ if (ch) -+ *ch = '\0'; -+} -+ -+void write_reserved_ports(char *path) -+{ -+ int fd = -1; -+ char buf[PATH_MAX]; -+ -+ fd = open(path, O_RDWR | O_CREAT, 0640); -+ if (fd < 0) { -+ pr_err("Cannot open %s ret %d cwd: %s\n", path, fd, buf); -+ return; -+ } -+ -+ cr_system(-1, fd, -1, "/usr/bin/echo", -+ (char *[]) { "echo", reserved_ports, NULL}, 0); -+ close(fd); -+} -+ -+static int add_reserved_ports(struct inet_sk_desc *sk) -+{ -+ if (reserved_ports_num >= opts.reserve_ports) -+ return -1; -+ -+ if (strlen(reserved_ports) == 0) -+ snprintf(reserved_ports, 6, "%u", sk->src_port); -+ else -+ snprintf(reserved_ports + strlen(reserved_ports), 7, ",%u", sk->src_port); -+ reserved_ports_num++; -+ -+ return 0; -+} -+ -+void set_reserved_ports(void) -+{ -+ struct inet_sk_desc *sk = NULL; -+ size_t size = 0; -+ -+ read_reserved_ports(RESERVED_PORTS_PATH); -+ -+ write_reserved_ports("ip_local_reserved_ports"); -+ -+ size = strlen(reserved_ports) + 6 * opts.reserve_ports + 1; -+ if (xrealloc_safe(&reserved_ports, size)) -+ exit(1); -+ -+ list_for_each_entry(sk, &cpt_tcp_repair_sockets, rlist) -+ add_reserved_ports(sk); -+ -+ write_reserved_ports(RESERVED_PORTS_PATH); -+ -+ free(reserved_ports); -+ reserved_ports = NULL; -+} -diff --git a/criu/sk-tcp.c b/criu/sk-tcp.c -index 0afecd2..38889d7 100644 ---- a/criu/sk-tcp.c -+++ b/criu/sk-tcp.c -@@ -30,7 +30,7 @@ - #undef LOG_PREFIX - #define LOG_PREFIX "tcp: " - --static LIST_HEAD(cpt_tcp_repair_sockets); -+LIST_HEAD(cpt_tcp_repair_sockets); - static LIST_HEAD(rst_tcp_repair_sockets); - - static int lock_connection(struct inet_sk_desc *sk) --- -2.34.1 - diff --git a/0042-reg-file-fix-dump-fail-problem-with-null-seek-op.patch b/0042-reg-file-fix-dump-fail-problem-with-null-seek-op.patch deleted file mode 100644 index 76aeb091b4b8875bfeca72a454c825523de2c5ed..0000000000000000000000000000000000000000 --- a/0042-reg-file-fix-dump-fail-problem-with-null-seek-op.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 06a0277c2aab1442c724217957fd5f915ace2753 Mon Sep 17 00:00:00 2001 -From: Zhuling -Date: Thu, 22 Jul 2021 10:15:15 +0800 -Subject: [PATCH 42/72] reg-file: fix dump fail problem with null seek op - -Some customizing `struct file_operations` implementation has -no `llseek`, therefore ignore the no-implementation errno. - -Fix file dumping fail problem when the file seek op is null. - -Signed-off-by: Jingxian He -Signed-off-by: fu.lin ---- - criu/files-reg.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/criu/files-reg.c b/criu/files-reg.c -index 1a3b836..6dc8745 100644 ---- a/criu/files-reg.c -+++ b/criu/files-reg.c -@@ -2176,9 +2176,18 @@ static int do_open_reg(int ns_root_fd, struct reg_file_info *rfi, void *arg) - */ - if (!(rfi->rfe->flags & O_PATH)) { - if (rfi->rfe->pos != -1ULL && lseek(fd, rfi->rfe->pos, SEEK_SET) < 0) { -- pr_perror("Can't restore file pos"); -- close(fd); -- return -1; -+ /* -+ * Some customizing `struct file_operations` -+ * implementation has no `llseek`, therefore -+ * ignore the no-implementation errno. -+ */ -+ if (errno == ESPIPE) { -+ pr_warn("No ability to restore file ops\n"); -+ } else { -+ pr_perror("Can't restore file pos"); -+ close(fd); -+ return -1; -+ } - } - } - --- -2.34.1 - diff --git a/0043-fix-dump-fail-problem-with-no-access-to-get-socket-f.patch b/0043-fix-dump-fail-problem-with-no-access-to-get-socket-f.patch deleted file mode 100644 index 754ee2b62eec697e909c735a9d2e1c53d77c4628..0000000000000000000000000000000000000000 --- a/0043-fix-dump-fail-problem-with-no-access-to-get-socket-f.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 88274e29aaaec4a53df996ae84c37ad20f36395f Mon Sep 17 00:00:00 2001 -From: Zhuling -Date: Sat, 24 Jul 2021 16:37:17 +0800 -Subject: [PATCH 43/72] fix dump fail problem with no access to get socket - filter - -Someone uses bpf hook by writing the kernel function instead -of the bpf code, it causes the error here. - -Fix socket dumping fail problem when user space has no access -to getting socket filter. - -Signed-off-by: Jingxian He -Signed-off-by: fu.lin ---- - criu/sockets.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/criu/sockets.c b/criu/sockets.c -index 2ddf85e..e412a1d 100644 ---- a/criu/sockets.c -+++ b/criu/sockets.c -@@ -355,7 +355,12 @@ static int dump_socket_filter(int sk, SkOptsEntry *soe) - - ret = getsockopt(sk, SOL_SOCKET, SO_GET_FILTER, NULL, &len); - if (ret) { -- pr_perror("Can't get socket filter len"); -+ pr_warn("Can't get socket filter len"); -+ /* Someone uses bpf hook by writing the kernel function -+ * instead of the bpf code, it causes the error here. -+ */ -+ if (errno == EACCES) -+ return 0; - return ret; - } - --- -2.34.1 - diff --git a/0044-proc-parse-fix-vma-offset-value-for-the-sysfs-file-o.patch b/0044-proc-parse-fix-vma-offset-value-for-the-sysfs-file-o.patch deleted file mode 100644 index b363ba7310e7a588fd6746d275c99ee3e22fc29c..0000000000000000000000000000000000000000 --- a/0044-proc-parse-fix-vma-offset-value-for-the-sysfs-file-o.patch +++ /dev/null @@ -1,139 +0,0 @@ -From c7f9888e234a626a4d7bf31b89d66b91607f9785 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Tue, 27 Jul 2021 11:40:34 +0800 -Subject: [PATCH 44/72] proc parse: fix vma offset value for the sysfs file of - pci devices - -Some pci devices create bin sysfs file which permit to use `mmap()` -syscall, the 6th parameter `offset` is always 0 when those kinds of -files create file mapping. The value of `offset` will be assign to -`vma->vm_pgoff` in kernel. However, it will be changed to pci address -automically during mmap callback function `pci_mmap_resource_range()`, -and the offset in `/proc//maps` will show non-zero. It will result -criu restore fails. - -There are many of those files. Just retry the mmap action. - -NOTICE: the stragy is try best, not whitelist. - -Signed-off-by: He Jingxian -Signed-off-by: fu.lin -Signed-off-by: fu.lin ---- - criu/include/image.h | 1 + - criu/pie/restorer.c | 22 +++++++++++++++++++--- - criu/proc_parse.c | 32 ++++++++++++++++++++++++++++++++ - 3 files changed, 52 insertions(+), 3 deletions(-) - -diff --git a/criu/include/image.h b/criu/include/image.h -index 66492c0..0156314 100644 ---- a/criu/include/image.h -+++ b/criu/include/image.h -@@ -86,6 +86,7 @@ - #define VMA_AREA_MEMFD (1 << 14) - #define VMA_AREA_ANON_INODE (1 << 15) - #define VMA_AREA_CHR (1 << 16) -+#define VMA_AREA_DEV_SHARE (1 << 17) - - #define VMA_CLOSE (1 << 28) - #define VMA_NO_PROT_WRITE (1 << 29) -diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c -index fde6e30..67b0d4c 100644 ---- a/criu/pie/restorer.c -+++ b/criu/pie/restorer.c -@@ -883,8 +883,9 @@ static unsigned long restore_mapping(VmaEntry *vma_entry) - * that mechanism as it causes the process to be charged for memory - * immediately upon mmap, not later upon preadv(). - */ -- pr_debug("\tmmap(%" PRIx64 " -> %" PRIx64 ", %x %x %d)\n", vma_entry->start, vma_entry->end, prot, flags, -- (int)vma_entry->fd); -+ pr_debug("\tmmap(%" PRIx64 " -> %" PRIx64 ", %x %x %d %lx)\n", -+ vma_entry->start, vma_entry->end, prot, flags, -+ (int)vma_entry->fd, vma_entry->pgoff); - /* - * Should map memory here. Note we map them as - * writable since we're going to restore page -@@ -892,6 +893,20 @@ static unsigned long restore_mapping(VmaEntry *vma_entry) - */ - addr = sys_mmap(decode_pointer(vma_entry->start), vma_entry_len(vma_entry), prot, flags, vma_entry->fd, - vma_entry->pgoff); -+ /* Some drivers implements its own mmap callback, the `mmap()` argument -+ * `offset` has the differet semantic with POSIX standard. Therefore, -+ * try to re-mmap with offset 0. -+ * -+ * NOTICE: the stragy is try best, not whitelist. -+ */ -+ if (addr == -EINVAL && vma_entry->pgoff != 0) { -+ pr_info("try mmap with offset 0\n"); -+ addr = sys_mmap(decode_pointer(vma_entry->start), -+ vma_entry_len(vma_entry), -+ prot, flags, -+ vma_entry->fd, -+ 0); -+ } - - if ((vma_entry->fd != -1) && (vma_entry->status & VMA_CLOSE)) - sys_close(vma_entry->fd); -@@ -1979,7 +1994,8 @@ long __export_restore_task(struct task_restore_args *args) - if (!vma_entry->has_madv || !vma_entry->madv) - continue; - -- if (vma_entry_is(vma_entry, VMA_AREA_ANON_INODE)) -+ if (vma_entry_is(vma_entry, VMA_AREA_ANON_INODE) || -+ vma_entry_is(vma_entry, VMA_AREA_DEV_SHARE)) - continue; - - for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) { -diff --git a/criu/proc_parse.c b/criu/proc_parse.c -index d13589c..282a2e9 100644 ---- a/criu/proc_parse.c -+++ b/criu/proc_parse.c -@@ -552,6 +552,35 @@ static inline int handle_vvar_vma(struct vma_area *vma) - return 0; - } - -+static bool is_sysfs_resource(const char *path) -+{ -+ char *sub = NULL; -+ const char *prefix = "resource"; -+ const char *suffix = "_wc"; -+ -+ if (strstr(path, "devices/") == NULL) -+ return false; -+ -+ sub = rindex(path, '/'); -+ if (sub == NULL) -+ return false; -+ -+ sub += 1; -+ if (strncmp(sub, prefix, strlen(prefix)) != 0) -+ return false; -+ -+ sub += strlen(prefix); -+ while (*sub != '\0' && (*sub >= '0' && *sub <= '9')) -+ sub += 1; -+ -+ if (*sub == '\0') -+ return true; -+ if (!strcmp(sub, suffix)) -+ return true; -+ else -+ return false; -+} -+ - static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_path, DIR *map_files_dir, - struct vma_file_info *vfi, struct vma_file_info *prev_vfi, int *vm_file_fd) - { -@@ -571,6 +600,9 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat - goto err; - } else if (!strcmp(file_path, "[heap]")) { - vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP; -+ } else if (is_sysfs_resource(file_path)) { -+ pr_info("find sys device module share memory\n"); -+ vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_DEV_SHARE; - } else { - vma_area->e->status = VMA_AREA_REGULAR; - } --- -2.34.1 - diff --git a/0045-add-reuse-file-method-for-recover-deleted-file-state.patch b/0045-add-reuse-file-method-for-recover-deleted-file-state.patch deleted file mode 100644 index 1d8130e4c04ad24a3462a52c7fcd0be6faaf18af..0000000000000000000000000000000000000000 --- a/0045-add-reuse-file-method-for-recover-deleted-file-state.patch +++ /dev/null @@ -1,244 +0,0 @@ -From 1328e32ee05c59f7168039211c9d96176ff22791 Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Sat, 14 Aug 2021 16:45:40 +0800 -Subject: [PATCH 45/72] add reuse file method for recover deleted file state - -Orphan inode maybe exist in checkpoint process. Sometimes it can't be -re-linked by `linkat()` syscall, e.g. sysfs. - -Therefore, add reuse file method for recover file state of deleted -files. - -Signed-off-by: Jingxian He -Signed-off-by: fu.lin ---- - criu/Makefile.crtools | 1 + - criu/files-reg.c | 10 ++++-- - criu/files.c | 22 +++++++++++- - criu/include/orphan-inode.h | 16 +++++++++ - criu/orphan-inode.c | 71 +++++++++++++++++++++++++++++++++++++ - 5 files changed, 116 insertions(+), 4 deletions(-) - create mode 100644 criu/include/orphan-inode.h - create mode 100644 criu/orphan-inode.c - -diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools -index 3e522b4..7fee749 100644 ---- a/criu/Makefile.crtools -+++ b/criu/Makefile.crtools -@@ -95,6 +95,7 @@ obj-y += devname.o - obj-y += files-chr.o - obj-y += exit-notify.o - obj-y += reserved-ports.o -+obj-y += orphan-inode.o - obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o - obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o - CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 -diff --git a/criu/files-reg.c b/criu/files-reg.c -index 6dc8745..ed46764 100644 ---- a/criu/files-reg.c -+++ b/criu/files-reg.c -@@ -46,6 +46,7 @@ - #include "external.h" - #include "memfd.h" - #include "files-chr.h" -+#include "orphan-inode.h" - - #include "protobuf.h" - #include "util.h" -@@ -1260,8 +1261,10 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms, - */ - - if (errno == ENOENT) { -- link_strip_deleted(link); -- return dump_linked_remap(rpath + 1, plen - 1, ost, lfd, id, nsid); -+ pr_info("Start add no exist file: %s\n", rpath+1); -+ add_reuse_file(id, lfd, dst_pid); -+ need_reuse_flag = O_REUSE; -+ return 0; - } - - pr_perror("Can't stat path"); -@@ -1663,7 +1666,8 @@ ext: - rfe.has_mode = true; - rfe.mode = p->stat.st_mode; - -- if (S_ISREG(p->stat.st_mode) && should_check_size(rfe.flags) && !store_validation_data(&rfe, p, lfd)) -+ if (S_ISREG(p->stat.st_mode) && should_check_size(rfe.flags) -+ && (need_reuse_flag != O_REUSE) && !store_validation_data(&rfe, p, lfd)) - return -1; - - fe.type = FD_TYPES__REG; -diff --git a/criu/files.c b/criu/files.c -index 1c52cf4..e79052e 100644 ---- a/criu/files.c -+++ b/criu/files.c -@@ -50,6 +50,7 @@ - #include "fdstore.h" - #include "bpfmap.h" - #include "files-chr.h" -+#include "orphan-inode.h" - - #include "protobuf.h" - #include "util.h" -@@ -706,6 +707,7 @@ int dump_my_file(int lfd, u32 *id, int *type) - } - - int dst_pid; -+int need_reuse_flag; - - int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, struct parasite_drain_fd *dfds) - { -@@ -743,10 +745,13 @@ int dump_task_files_seized(struct parasite_ctl *ctl, struct pstree_item *item, s - for (i = 0; i < nr_fds; i++) { - FdinfoEntry e = FDINFO_ENTRY__INIT; - -+ need_reuse_flag = 0; - ret = dump_one_file(item->pid, dfds->fds[i + off], lfds[i], opts + i, ctl, &e, dfds); - if (ret) - break; - -+ e.flags |= need_reuse_flag; -+ pr_info("write fdinfoEntry fd=%d id=%d\n", (&e)->fd, (&e)->id); - ret = pb_write_one(img, &e, PB_FDINFO); - if (ret) - break; -@@ -939,7 +944,8 @@ int collect_fd(int pid, FdinfoEntry *e, struct rst_info *rst_info, bool fake) - { - struct file_desc *fdesc; - -- pr_info("Collect fdinfo pid=%d fd=%d id=%#x\n", pid, e->fd, e->id); -+ pr_info("Collect fdinfo pid=%d fd=%d id=%#x flags: %#x\n", -+ pid, e->fd, e->id, e->flags); - - fdesc = find_file_desc(e); - if (fdesc == NULL) { -@@ -1230,6 +1236,7 @@ static int open_fd(struct fdinfo_list_entry *fle) - int new_fd = -1, ret; - struct chrfile_info *ci; - -+ pr_info("open file flags: %#x\n", fle->fe->flags); - flem = file_master(d); - if (fle != flem) { - BUG_ON(fle->stage != FLE_INITIALIZED); -@@ -1251,6 +1258,19 @@ static int open_fd(struct fdinfo_list_entry *fle) - return 0; - } - } -+ } else if (fle->fe->flags & O_REUSE) { -+ pr_info("find reuse file:%d\n", d->id); -+ ret = repair_reuse_file(d->id); -+ if (!ret) { -+ new_fd = get_reuse_file(); -+ pr_info("get reuse file:%d\n", new_fd); -+ if (new_fd <= 0 || setup_and_serve_out(fle, new_fd) < 0) { -+ pr_err("setup reuse file fail\n"); -+ return -1; -+ } -+ fle->stage = FLE_RESTORED; -+ return 0; -+ } - } - - /* -diff --git a/criu/include/orphan-inode.h b/criu/include/orphan-inode.h -new file mode 100644 -index 0000000..bc3b6ae ---- /dev/null -+++ b/criu/include/orphan-inode.h -@@ -0,0 +1,16 @@ -+#ifndef __CRIU_ORPHAN_INODE_H__ -+#define __CRIU_ORPHAN_INODE_H__ -+ -+#define ADD_REUSE_FILE_PATH "/sys/kernel/add_reuse_file" -+#define REPAIR_REUSE_FILE_PATH "/sys/kernel/repair_reuse_file" -+#define REUSE_FILE_PATH "/sys/kernel/reuse_file" -+#define O_REUSE 0100000000 -+ -+extern int dst_pid; -+extern int need_reuse_flag; -+ -+int add_reuse_file(u32 id, int fd, int pid); -+int repair_reuse_file(int id); -+int get_reuse_file(void); -+ -+#endif /* __CRIU_ORPHAN_INODE_H__ */ -diff --git a/criu/orphan-inode.c b/criu/orphan-inode.c -new file mode 100644 -index 0000000..c4e38dc ---- /dev/null -+++ b/criu/orphan-inode.c -@@ -0,0 +1,71 @@ -+#include -+#include -+#include -+#include -+#include -+ -+#include "int.h" -+#include "log.h" -+#include "orphan-inode.h" -+ -+int add_reuse_file(u32 id, int fd, int pid) -+{ -+ int retval; -+ char buf[256] = {0}; -+ -+ retval = snprintf(buf, 256, "%u,%d,%d", id, fd, pid); -+ if (retval <= 0) -+ return -EFAULT; -+ -+ fd = open(ADD_REUSE_FILE_PATH, O_WRONLY, 0); -+ if (fd < 0) { -+ pr_err("open file:%s fail\n", ADD_REUSE_FILE_PATH); -+ return fd; -+ } -+ -+ retval = write(fd, buf, strlen(buf)); -+ close(fd); -+ -+ return retval < 0 ? -1 : 0; -+} -+ -+int repair_reuse_file(int id) -+{ -+ int retval, fd; -+ char buf[256] = {0}; -+ -+ retval = snprintf(buf, 256, "%u", id); -+ if (retval <= 0) -+ return -EFAULT; -+ -+ fd = open(REPAIR_REUSE_FILE_PATH, O_WRONLY, 0); -+ if (fd < 0) { -+ pr_err("open file:%s fail\n", REPAIR_REUSE_FILE_PATH); -+ return fd; -+ } -+ retval = write(fd, buf, strlen(buf)); -+ -+ close(fd); -+ return retval < 0 ? -1 : 0; -+} -+ -+int get_reuse_file(void) -+{ -+ int fd; -+ ssize_t count; -+ int retval = -1; -+ char buf[32] = {0}; -+ -+ fd = open(REUSE_FILE_PATH, O_RDONLY , 0); -+ if (fd < 0) { -+ pr_err("open file:%s fail\n", REUSE_FILE_PATH); -+ return fd; -+ } -+ -+ count = read(fd, buf, sizeof(buf)); -+ if (count > 0) -+ retval = atoi(buf); -+ -+ close(fd); -+ return retval; -+} --- -2.34.1 - diff --git a/0046-sk-fix-share-sockets-repair-problem.patch b/0046-sk-fix-share-sockets-repair-problem.patch deleted file mode 100644 index e1e666a39f7d0904f7b585791d0eaec2a09a3845..0000000000000000000000000000000000000000 --- a/0046-sk-fix-share-sockets-repair-problem.patch +++ /dev/null @@ -1,133 +0,0 @@ -From 8b1856d5c72c6870c04a87158718d2df62591a6c Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 11 Aug 2021 15:01:27 +0800 -Subject: [PATCH 46/72] sk: fix share sockets repair problem - -Repair off the share sockets after reusing them -to recover the share socket state. - -Signed-off-by: Jingxian He -Signed-off-by: fu.lin ---- - criu/files.c | 33 ++++++++++++++++++++++++++++++++- - criu/sk-inet.c | 7 +++++-- - criu/sk-netlink.c | 5 +++-- - 3 files changed, 40 insertions(+), 5 deletions(-) - -diff --git a/criu/files.c b/criu/files.c -index e79052e..24ed219 100644 ---- a/criu/files.c -+++ b/criu/files.c -@@ -51,6 +51,7 @@ - #include "bpfmap.h" - #include "files-chr.h" - #include "orphan-inode.h" -+#include "sk-inet.h" - - #include "protobuf.h" - #include "util.h" -@@ -1215,7 +1216,7 @@ int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd) - if (reopen_fd_as(fle->fe->fd, new_fd)) - return -1; - -- pr_info("*******flags: %d",fle->fe->flags); -+ pr_info("*******flags: %d\n",fle->fe->flags); - if (fcntl(fle->fe->fd, F_SETFD, fle->fe->flags) == -1) { - pr_perror("Unable to set file descriptor flags"); - return -1; -@@ -1229,6 +1230,30 @@ int setup_and_serve_out(struct fdinfo_list_entry *fle, int new_fd) - return 0; - } - -+#define MAX_SHARE_SOCKETS_NUM 25000 -+int repair_share_sockets[MAX_SHARE_SOCKETS_NUM]; -+int repair_share_num; -+ -+int add_repair_share_socket(int fd) -+{ -+ if (repair_share_num >= MAX_SHARE_SOCKETS_NUM) -+ return -1; -+ repair_share_sockets[repair_share_num] = fd; -+ repair_share_num++; -+ return 0; -+} -+ -+void repair_off_share_sockets(void) -+{ -+ int i; -+ -+ for (i = 0; i < repair_share_num; i++) { -+ tcp_repair_off(repair_share_sockets[i]); -+ pr_info("repair off socket:%d\n", repair_share_sockets[i]); -+ } -+ repair_share_num = 0; -+} -+ - static int open_fd(struct fdinfo_list_entry *fle) - { - struct file_desc *d = fle->desc; -@@ -1248,6 +1273,7 @@ static int open_fd(struct fdinfo_list_entry *fle) - - if (d->ops->type == FD_TYPES__INETSK) { - if (check_need_repair(d)) { -+ pr_info("start repair for:%d\n", d->id); - ret = repair_share_socket(d->id); - if (!ret) { - new_fd = get_share_socket(); -@@ -1255,6 +1281,10 @@ static int open_fd(struct fdinfo_list_entry *fle) - if (new_fd <= 0 || setup_and_serve_out(fle, new_fd) < 0) - return -1; - fle->stage = FLE_RESTORED; -+ if (add_repair_share_socket(fle->fe->fd)) { -+ pr_perror("add repair share socket fail\n"); -+ return -1; -+ } - return 0; - } - } -@@ -1379,6 +1409,7 @@ static int open_fdinfos(struct pstree_item *me) - wait_fds_event(); - } while (again || progress); - -+ repair_off_share_sockets(); - BUG_ON(!list_empty(list)); - /* - * Fake fles may be used for restore other -diff --git a/criu/sk-inet.c b/criu/sk-inet.c -index c7de793..c0251db 100644 ---- a/criu/sk-inet.c -+++ b/criu/sk-inet.c -@@ -635,8 +635,11 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa - BUG_ON(sk->sd.already_dumped); - - if (check_share_dst_port(sk->dst_port) || check_share_src_port(sk->src_port)) { -- pr_info("Start add share prot:%d src %d\n", sk->dst_port, sk->src_port); -- add_share_socket(id, lfd, dst_pid, sk->src_port); -+ pr_info("Start add share prot:%d-%d dst_pid %d id %d\n", -+ sk->dst_port, sk->src_port, dst_pid, id); -+ ret = add_share_socket(id, lfd, dst_pid, sk->src_port); -+ if (ret) -+ pr_warn("add share socket ret %d\n", ret); - } - - ie.id = id; -diff --git a/criu/sk-netlink.c b/criu/sk-netlink.c -index d4b3b7b..2832060 100644 ---- a/criu/sk-netlink.c -+++ b/criu/sk-netlink.c -@@ -115,9 +115,10 @@ static bool can_dump_netlink_sk(int lfd) - - ret = fd_has_data(lfd); - if (ret == 1) -- pr_err("The socket has data to read\n"); -+ pr_warn("The socket has data to read\n"); - -- return ret == 0; -+ /* ignore netlink socket data */ -+ return true; - } - - static int dump_one_netlink_fd(int lfd, u32 id, const struct fd_parms *p) --- -2.34.1 - diff --git a/0047-mm-add-clear-pin-mem-and-init-page-map-option.patch b/0047-mm-add-clear-pin-mem-and-init-page-map-option.patch deleted file mode 100644 index 3ea17c5fcd7debdf5a7ca09151b8bf1aa5b0b7c1..0000000000000000000000000000000000000000 --- a/0047-mm-add-clear-pin-mem-and-init-page-map-option.patch +++ /dev/null @@ -1,107 +0,0 @@ -From 1cb92fe0a930cf862f8a3ecd9a812d5b2e3aea60 Mon Sep 17 00:00:00 2001 -From: root -Date: Wed, 8 Sep 2021 08:23:11 +0000 -Subject: [PATCH 47/72] mm: add clear pin mem and init page map option - -Add 'clear-pin-mem' option for clearing pin memory data, -and 'init-page-map' option for initializationing buffer for -reading page map info. - -Signed-off-by: Jingxian He -Signed-off-by: fu.lin ---- - criu/crtools.c | 13 ++++++++++++- - criu/include/pin-mem.h | 4 ++++ - criu/pin-mem.c | 20 ++++++++++++++++++++ - 3 files changed, 36 insertions(+), 1 deletion(-) - -diff --git a/criu/crtools.c b/criu/crtools.c -index ae858e8..cc0a18f 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -43,6 +43,7 @@ - #include "fault-injection.h" - #include "proc_parse.h" - #include "kerndat.h" -+#include "pin-mem.h" - - #include "setproctitle.h" - #include "sysctl.h" -@@ -169,6 +170,14 @@ int main(int argc, char *argv[], char *envp[]) - goto usage; - } - -+ if (!strcmp(argv[optind], "clear-pin-memory")) { -+ return clear_pin_mem(0); -+ } -+ -+ if (!strcmp(argv[optind], "init-pagemap-read")) { -+ return init_pagemap_read(0); -+ } -+ - /* We must not open imgs dir, if service is called */ - if (strcmp(argv[optind], "service")) { - ret = open_image_dir(opts.imgs_dir, image_dir_mode(argv, optind)); -@@ -320,7 +329,9 @@ usage: - " service launch service\n" - " dedup remove duplicates in memory dump\n" - " cpuinfo dump writes cpu information into image file\n" -- " cpuinfo check validates cpu information read from image file\n"); -+ " cpuinfo check validates cpu information read from image file\n" -+ " clear-pin-memory clear pin memory manage data\n" -+ " init-pagemap-read init data buffer for reading page map info\n"); - - if (usage_error) { - pr_msg("\nTry -h|--help for more info\n"); -diff --git a/criu/include/pin-mem.h b/criu/include/pin-mem.h -index 2b54996..b28ef3d 100644 ---- a/criu/include/pin-mem.h -+++ b/criu/include/pin-mem.h -@@ -39,6 +39,9 @@ struct pin_mem_area_set { - #define _SET_FORK_PID 8 - #define SET_FORK_PID _IOW(PIN_MEM_MAGIC, _SET_FORK_PID, int) - -+#define _INIT_PAGEMAP_READ 5 -+#define INIT_PAGEMAP_READ _IOW(PIN_MEM_MAGIC, _INIT_PAGEMAP_READ, int) -+ - #endif /* __has_include("linux/pin_memory.h") */ - - #define PIN_MEM_FILE "/dev/pinmem" -@@ -49,5 +52,6 @@ int pin_vmae(VmaEntry *vmae, struct pstree_item *item); - int dump_task_special_pages(int pid); - int restore_task_special_pages(int pid); - int clear_pin_mem(int pid); -+int init_pagemap_read(int para); - - #endif /* __CRIU_PIN_MEM_H__ */ -diff --git a/criu/pin-mem.c b/criu/pin-mem.c -index b18db97..96ca2c5 100644 ---- a/criu/pin-mem.c -+++ b/criu/pin-mem.c -@@ -144,3 +144,23 @@ int clear_pin_mem(int pid) - close(fd); - return ret; - } -+ -+int init_pagemap_read(int para) -+{ -+ int fd, ret; -+ -+ fd = open(PIN_MEM_FILE, O_RDWR, 0); -+ if (fd < 0) { -+ pr_warn("error open file: %s\n", PIN_MEM_FILE); -+ return -1; -+ } -+ -+ ret = ioctl(fd, INIT_PAGEMAP_READ, (unsigned long) ¶); -+ if (ret < 0) { -+ pr_warn("Init pagemap read fail, errno: %s\n", strerror(errno)); -+ } -+ -+ close(fd); -+ return ret; -+} -+ --- -2.34.1 - diff --git a/0048-fds-fix-fds-list-restore.patch b/0048-fds-fix-fds-list-restore.patch deleted file mode 100644 index 11de7f41ced0ade1571c022fdea264acb3f43b27..0000000000000000000000000000000000000000 --- a/0048-fds-fix-fds-list-restore.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 803ee02298e0a71b07cf611eee68e23f702d259e Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Thu, 16 Sep 2021 13:50:46 +0000 -Subject: [PATCH 48/72] fds: fix fds list restore - -When there exist multi processes need to dump, the child process may -have the same fds as parent process. During the restore processing, -criu choose the process which has the min pid value to be the master -process to recover fds. However, choosing the parent process as the -master process is more suitable. - -Signed-off-by: Jingxian He ---- - criu/files.c | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/criu/files.c b/criu/files.c -index 24ed219..6d8b812 100644 ---- a/criu/files.c -+++ b/criu/files.c -@@ -906,12 +906,7 @@ static struct fdinfo_list_entry *alloc_fle(int pid, FdinfoEntry *fe) - - static void __collect_desc_fle(struct fdinfo_list_entry *new_le, struct file_desc *fdesc) - { -- struct fdinfo_list_entry *le; -- -- list_for_each_entry_reverse(le, &fdesc->fd_info_head, desc_list) -- if (pid_rst_prio_eq(le->pid, new_le->pid)) -- break; -- list_add(&new_le->desc_list, &le->desc_list); -+ list_add(&new_le->desc_list, &fdesc->fd_info_head); - } - - static void collect_desc_fle(struct fdinfo_list_entry *new_le, struct file_desc *fdesc, bool force_master) --- -2.34.1 - diff --git a/0049-log-print-error-log-to-dev-kmsg.patch b/0049-log-print-error-log-to-dev-kmsg.patch deleted file mode 100644 index 688bd88f93b485b3aea7c1d02c7652c781dc7f38..0000000000000000000000000000000000000000 --- a/0049-log-print-error-log-to-dev-kmsg.patch +++ /dev/null @@ -1,88 +0,0 @@ -From bec1445fd5dcfffb24918d725163f3be35f8b634 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Tue, 19 Oct 2021 20:53:19 +0800 -Subject: [PATCH 49/72] log: print error log to /dev/kmsg - -The criu log can't be flushed to disk when OS crash in storage -environment, therefore, output high level msg to /dev/kmsg. - -Signed-off-by: fu.lin ---- - criu/Makefile.crtools | 1 + - criu/include/log.h | 3 +++ - criu/kmsg.c | 16 ++++++++++++++++ - criu/log.c | 4 ++++ - 4 files changed, 24 insertions(+) - create mode 100644 criu/kmsg.c - -diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools -index 7fee749..3bb7c19 100644 ---- a/criu/Makefile.crtools -+++ b/criu/Makefile.crtools -@@ -96,6 +96,7 @@ obj-y += files-chr.o - obj-y += exit-notify.o - obj-y += reserved-ports.o - obj-y += orphan-inode.o -+obj-y += kmsg.o - obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o - obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o - CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 -diff --git a/criu/include/log.h b/criu/include/log.h -index 85e6dc2..aafea95 100644 ---- a/criu/include/log.h -+++ b/criu/include/log.h -@@ -2,6 +2,7 @@ - #define __CR_LOG_H__ - - #include -+#include - - #ifndef CR_NOGLIBC - -@@ -62,4 +63,6 @@ void flush_early_log_buffer(int fd); - - #endif /* CR_NOGLIBC */ - -+void write_kmsg(const void *buf, size_t count); -+ - #endif /* __CR_LOG_H__ */ -diff --git a/criu/kmsg.c b/criu/kmsg.c -new file mode 100644 -index 0000000..c956dfb ---- /dev/null -+++ b/criu/kmsg.c -@@ -0,0 +1,16 @@ -+#include -+#include -+ -+#define SYSLOG_DEV "/dev/kmsg" -+ -+void write_kmsg(const void *buf, size_t count) -+{ -+ int fd; -+ -+ fd = open(SYSLOG_DEV, O_CLOEXEC | O_WRONLY); -+ if (fd < 0) -+ return; -+ -+ write(fd, buf, count); -+ close(fd); -+} -diff --git a/criu/log.c b/criu/log.c -index c4ce90e..ba208f7 100644 ---- a/criu/log.c -+++ b/criu/log.c -@@ -373,6 +373,10 @@ static void vprint_on_level(unsigned int loglevel, const char *format, va_list p - size += buf_off; - - while (off < size) { -+ if (loglevel <= LOG_WARN) { -+ write_kmsg(buffer + off, size - off); -+ } -+ - ret = write(fd, buffer + off, size - off); - if (ret <= 0) - break; --- -2.34.1 - diff --git a/0050-unix-sk-improve-dgram-robustness.patch b/0050-unix-sk-improve-dgram-robustness.patch deleted file mode 100644 index b20170284a7df73a1d0a7dbac20d2cce41d79b34..0000000000000000000000000000000000000000 --- a/0050-unix-sk-improve-dgram-robustness.patch +++ /dev/null @@ -1,83 +0,0 @@ -From 6dde331da8e28e129010aee391e7ef3d757490cd Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Tue, 26 Oct 2021 11:13:27 +0800 -Subject: [PATCH 50/72] unix sk: improve dgram robustness - -We should try out best to ensure the success of criu. As for unix dgram -socket, criu use re-connect instead of repair instead of unix stream -socket. Therefore, this patch does the following things: - -- detect unix dgram unix sock file when criu dumps unix dgram socket -- add the fault tolerance of unix dgram socket connecting (focus on the - condition of `/dev/log` disappearance when rsyslog restart) - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: fu.lin ---- - criu/sk-unix.c | 35 +++++++++++++++++++++++++++++++++-- - 1 file changed, 33 insertions(+), 2 deletions(-) - -diff --git a/criu/sk-unix.c b/criu/sk-unix.c -index 86bfa18..de75425 100644 ---- a/criu/sk-unix.c -+++ b/criu/sk-unix.c -@@ -11,6 +11,7 @@ - #include - #include - #include -+#include - - #include "libnetlink.h" - #include "cr_options.h" -@@ -1435,6 +1436,33 @@ err: - return -1; - } - -+/* -+ * Sometimes, `/dev/log` will disappear because of the restart of rsyslog when -+ * rotating, criu try to connect `/dev/log` will report error at this time. We -+ * should try our best to ensure the success of criu restoration. Therefore, -+ * retry three times here. -+ */ -+static int unix_dgram_reconnect(int fd, struct sockaddr_un *addr, int len) -+{ -+ int retval = 0; -+ struct timespec tim = { -+ .tv_sec = 0, -+ .tv_nsec = 5e+8, -+ }; -+ -+ for (int i = 0; i < 3; i++) { -+ nanosleep(&tim, NULL); -+ pr_warn("Can't connect unix socket(%s), %d retry\n", -+ addr->sun_path, i); -+ retval = connect(fd, (struct sockaddr *)addr, -+ sizeof(addr->sun_family) + len); -+ if (retval == 0) -+ break; -+ } -+ -+ return retval; -+} -+ - static int post_open_standalone(struct file_desc *d, int fd) - { - int fdstore_fd = -1, procfs_self_dir = -1, len; -@@ -1521,8 +1549,11 @@ static int post_open_standalone(struct file_desc *d, int fd) - goto err_revert_and_exit; - } - } else if (connect(fd, (struct sockaddr *)&addr, sizeof(addr.sun_family) + len) < 0) { -- pr_perror("Can't connect %d socket", ui->ue->ino); -- goto err_revert_and_exit; -+ if (ui->ue->type != SOCK_DGRAM || errno != ENOENT -+ || unix_dgram_reconnect(fd, &addr, len) != 0) { -+ pr_perror("Can't connect %d socket", ui->ue->ino); -+ goto err_revert_and_exit; -+ } - } - mutex_unlock(mutex_ghost); - --- -2.34.1 - diff --git a/0051-sk-ignore-the-bind-error-for-icmp-socket.patch b/0051-sk-ignore-the-bind-error-for-icmp-socket.patch deleted file mode 100644 index 25a071f04ca71367dcbb4b448bf29b2145cf414e..0000000000000000000000000000000000000000 --- a/0051-sk-ignore-the-bind-error-for-icmp-socket.patch +++ /dev/null @@ -1,46 +0,0 @@ -From a7d5401953c548c9479c386b52fffcba6b49c0e3 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Wed, 27 Oct 2021 11:57:43 +0800 -Subject: [PATCH 51/72] sk: ignore the bind error for icmp socket - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: fu.lin ---- - criu/sk-inet.c | 20 ++++++++++++++++++-- - 1 file changed, 18 insertions(+), 2 deletions(-) - -diff --git a/criu/sk-inet.c b/criu/sk-inet.c -index c0251db..96c2d09 100644 ---- a/criu/sk-inet.c -+++ b/criu/sk-inet.c -@@ -1160,8 +1160,24 @@ int inet_bind(int sk, struct inet_sk_info *ii) - } - - if (bind(sk, (struct sockaddr *)&addr, addr_size) == -1) { -- pr_perror("Can't bind inet socket (id %d)", ii->ie->id); -- return -1; -+ InetSkEntry *ie = ii->ie; -+ -+ /* -+ * Sometimes the ping-like program restoration may appear -+ * `bind()` error when it is specified the address. In view -+ * of the principle that we should try our best to restore the -+ * process, and ping-like program works abnormal can tolerate, -+ * just warn here instead of report error. -+ */ -+ if (ie->proto == IPPROTO_ICMP || ie->proto == IPPROTO_ICMPV6) { -+ pr_warn("Can't bind inet socket (id %d) proto %s\n", -+ ie->id, -+ ie->proto == IPPROTO_ICMP ? -+ "IPPROTO_ICMP" : "IPPROTO_ICMPV6"); -+ } else { -+ pr_perror("Can't bind inet socket (id %d)", ii->ie->id); -+ return -1; -+ } - } - - if (rst_freebind) { --- -2.34.1 - diff --git a/0052-optimization-parallel-collecting-vmas.patch b/0052-optimization-parallel-collecting-vmas.patch deleted file mode 100644 index 3cdf13c5513e9060589d7e7dd2f097290bdecac3..0000000000000000000000000000000000000000 --- a/0052-optimization-parallel-collecting-vmas.patch +++ /dev/null @@ -1,505 +0,0 @@ -From ade879e6ccdc4c74a1c153f0750d2cd87ec8a4ec Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Tue, 30 Nov 2021 10:26:10 +0800 -Subject: [PATCH 52/72] optimization: parallel collecting vmas - -In order to improve criu dump performance, make the collecting vmas -operation parallel run with the other collecting operations. - -In order to prevent the concurrency problem by `find_unused_fd`, only -the main root task will parallel. - -Usage: - criu --parallel - -Note: - Ensure criu can use multi-core, otherwise the performance will -deterioration. - -Signed-off-by: fu.lin -Signed-off-by: hewenliang -Signed-off-by: Jingxian He ---- - criu/Makefile.crtools | 1 + - criu/Makefile.packages | 1 + - criu/config.c | 1 + - criu/cr-dump.c | 53 +++++++++++----- - criu/crtools.c | 1 + - criu/include/cr_options.h | 1 + - criu/include/pstree.h | 3 + - criu/include/taskqueue.h | 50 +++++++++++++++ - criu/namespaces.c | 9 ++- - criu/taskqueue.c | 124 ++++++++++++++++++++++++++++++++++++++ - 10 files changed, 228 insertions(+), 16 deletions(-) - create mode 100644 criu/include/taskqueue.h - create mode 100644 criu/taskqueue.c - -diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools -index 3bb7c19..2ad0207 100644 ---- a/criu/Makefile.crtools -+++ b/criu/Makefile.crtools -@@ -97,6 +97,7 @@ obj-y += exit-notify.o - obj-y += reserved-ports.o - obj-y += orphan-inode.o - obj-y += kmsg.o -+obj-y += taskqueue.o - obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o - obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o - CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 -diff --git a/criu/Makefile.packages b/criu/Makefile.packages -index 13c346f..851489b 100644 ---- a/criu/Makefile.packages -+++ b/criu/Makefile.packages -@@ -31,6 +31,7 @@ REQ-RPM-PKG-TEST-NAMES += $(PYTHON)-pyyaml - endif - - export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet -+export LIBS += -lpthread - - check-packages-failed: - $(warning Can not find some of the required libraries) -diff --git a/criu/config.c b/criu/config.c -index ae5f81e..fdbc5eb 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -709,6 +709,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - { "share-dst-ports", required_argument, 0, 2000 }, - { "share-src-ports", required_argument, 0, 2001 }, - { "reserve-ports", required_argument, 0, 'P' }, -+ BOOL_OPT("parallel", &opts.parallel), - {}, - }; - -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index a8ab61e..ee826c0 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -17,6 +17,7 @@ - - #include - #include -+#include - - #include "types.h" - #include "protobuf.h" -@@ -90,6 +91,7 @@ - #include "notifier.h" - #include "files-chr.h" - #include "reserved-ports.h" -+#include "taskqueue.h" - - /* - * Architectures can overwrite this function to restore register sets that -@@ -424,7 +426,7 @@ static int dump_pid_misc(pid_t pid, TaskCoreEntry *tc) - return 0; - } - --static int dump_filemap(struct vma_area *vma_area, int fd) -+int dump_filemap(struct vma_area *vma_area, int fd) - { - struct fd_parms p = FD_PARMS_INIT; - VmaEntry *vma = vma_area->e; -@@ -1504,7 +1506,7 @@ err_cure: - static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - { - pid_t pid = item->pid->real; -- struct vm_area_list vmas; -+ struct vm_area_list *vmas = NULL; - struct parasite_ctl *parasite_ctl; - int ret, exit_code = -1; - struct parasite_dump_misc misc; -@@ -1513,8 +1515,6 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - struct proc_posix_timers_stat proc_args; - struct mem_dump_ctl mdc; - -- vm_area_list_init(&vmas); -- - pr_info("========================================\n"); - pr_info("Dumping task (pid: %d)\n", pid); - pr_info("========================================\n"); -@@ -1525,12 +1525,23 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - */ - return 0; - -+ if (!opts.parallel || root_item->pid->real != item->pid->real ) { -+ vmas = xmalloc(sizeof(struct vm_area_list)); -+ if (vmas == NULL) { -+ pr_err("xmalloc no memory\n"); -+ return -1; -+ } -+ vm_area_list_init(vmas); -+ } else -+ vmas = item->maps_info.vmas; -+ - pr_info("Obtaining task stat ... \n"); - ret = parse_pid_stat(pid, &pps_buf); - if (ret < 0) - goto err; - -- ret = collect_mappings(pid, &vmas, dump_filemap); -+ ret = (opts.parallel && root_item->pid->real == item->pid->real) ? -+ 0 : collect_mappings(pid, vmas, dump_filemap); - if (ret) { - pr_err("Collect mappings (pid: %d) failed with %d\n", pid, ret); - goto err; -@@ -1570,7 +1581,10 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - goto err; - } - -- parasite_ctl = parasite_infect_seized(pid, item, &vmas); -+ if (opts.parallel && end_collect_mappings_thread(item)) -+ goto err; -+ -+ parasite_ctl = parasite_infect_seized(pid, item, vmas); - if (!parasite_ctl) { - pr_err("Can't infect (pid: %d) with parasite\n", pid); - goto err; -@@ -1600,13 +1614,13 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - goto err_cure_imgset; - } - -- ret = parasite_fixup_vdso(parasite_ctl, pid, &vmas); -+ ret = parasite_fixup_vdso(parasite_ctl, pid, vmas); - if (ret) { - pr_err("Can't fixup vdso VMAs (pid: %d)\n", pid); - goto err_cure_imgset; - } - -- ret = parasite_collect_aios(parasite_ctl, &vmas); /* FIXME -- merge with above */ -+ ret = parasite_collect_aios(parasite_ctl, vmas); /* FIXME -- merge with above */ - if (ret) { - pr_err("Failed to check aio rings (pid: %d)\n", pid); - goto err_cure_imgset; -@@ -1658,7 +1672,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - mdc.stat = &pps_buf; - mdc.parent_ie = parent_ie; - -- ret = parasite_dump_pages_seized(item, &vmas, &mdc, parasite_ctl); -+ ret = parasite_dump_pages_seized(item, vmas, &mdc, parasite_ctl); - if (ret) - goto err_cure; - -@@ -1719,7 +1733,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - goto err; - } - -- ret = dump_task_mm(pid, &pps_buf, &misc, &vmas, cr_imgset); -+ ret = dump_task_mm(pid, &pps_buf, &misc, vmas, cr_imgset); - if (ret) { - pr_err("Dump mappings (pid: %d) failed with %d\n", pid, ret); - goto err; -@@ -1735,7 +1749,8 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - exit_code = 0; - err: - close_pid_proc(); -- free_mappings(&vmas); -+ free_mappings(vmas); -+ free(vmas); - xfree(dfds); - return exit_code; - -@@ -1893,6 +1908,9 @@ int cr_pre_dump_tasks(pid_t pid) - if (opts.dump_char_dev && parse_devname() < 0) - goto err; - -+ if (opts.parallel && init_parallel_env() != 0) -+ goto err; -+ - root_item = alloc_pstree_item(); - if (!root_item) - goto err; -@@ -2107,6 +2125,13 @@ static int cr_dump_finish(int ret) - write_stats(DUMP_STATS); - pr_info("Dumping finished successfully\n"); - } -+ -+ /* -+ * Don't care threads' status and ignore unfree resources, use -+ * `exit_group()` to ensure exit all threads. -+ */ -+ syscall(SYS_exit_group, post_dump_ret ? : (ret != 0)); -+ - return post_dump_ret ?: (ret != 0); - } - -@@ -2203,13 +2228,13 @@ int cr_dump_tasks(pid_t pid) - if (collect_file_locks()) - goto err; - -- if (collect_namespaces(true) < 0) -- goto err; -- - glob_imgset = cr_glob_imgset_open(O_DUMP); - if (!glob_imgset) - goto err; - -+ if (collect_namespaces(true) < 0) -+ goto err; -+ - if (seccomp_collect_dump_filters() < 0) - goto err; - -diff --git a/criu/crtools.c b/criu/crtools.c -index cc0a18f..c20b3b7 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -472,6 +472,7 @@ usage: - " --weak-file-check Allow file size and mod larger than dumping value\n" - " --file-locks-repair Use repair mode to dump and restore file locks\n" - " --reserve-ports Reserve src ports in kernel\n" -+ " --parallel Collect smaps parallel to accellrate dumping speed\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index 3b61c6b..6478d4d 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -201,6 +201,7 @@ struct cr_options { - char *share_dst_ports; - char *share_src_ports; - int reserve_ports; -+ int parallel; - }; - - extern struct cr_options opts; -diff --git a/criu/include/pstree.h b/criu/include/pstree.h -index 97bef11..87e4c47 100644 ---- a/criu/include/pstree.h -+++ b/criu/include/pstree.h -@@ -1,6 +1,8 @@ - #ifndef __CR_PSTREE_H__ - #define __CR_PSTREE_H__ - -+#include "taskqueue.h" -+ - #include "common/list.h" - #include "common/lock.h" - #include "pid.h" -@@ -31,6 +33,7 @@ struct pstree_item { - futex_t task_st; - unsigned long task_st_le_bits; - }; -+ struct mappings_info maps_info; - }; - - static inline pid_t vpid(const struct pstree_item *i) -diff --git a/criu/include/taskqueue.h b/criu/include/taskqueue.h -new file mode 100644 -index 0000000..16f9e3d ---- /dev/null -+++ b/criu/include/taskqueue.h -@@ -0,0 +1,50 @@ -+#ifndef __CR_TASKQUEUE_H__ -+#define __CR_TASKQUEUE_H__ -+ -+#include -+#include -+#include -+ -+#include "vma.h" -+#include "pstree.h" -+ -+#include "common/list.h" -+ -+#define TASKQUEUE_HASH_SIZE 8 -+ -+struct taskqueue { -+ pthread_t task; -+ void *(*routine)(void *); -+ void *arg; -+ int result; -+}; -+#define queue_task queue.task -+#define queue_routine queue.routine -+#define queue_arg queue.arg -+#define queue_result queue.result -+ -+int init_parallel_env(void); -+ -+static inline int taskqueue_create(struct taskqueue *queue) -+{ -+ return pthread_create(&queue->task, NULL, queue->routine, queue->arg); -+} -+ -+static inline int taskqueue_join(struct taskqueue *queue) -+{ -+ return pthread_join(queue->task, NULL); -+} -+ -+/* parallel collect smaps */ -+struct mappings_info { -+ struct hlist_node hash; -+ pid_t pid; -+ struct vm_area_list *vmas; -+ dump_filemap_t dump_file; -+ struct taskqueue queue; -+}; -+ -+int start_collect_mappings_thread(void); -+int end_collect_mappings_thread(struct pstree_item *item); -+ -+#endif /* __CR_TASKQUEUE_H__ */ -diff --git a/criu/namespaces.c b/criu/namespaces.c -index 7fa5868..05e6732 100644 ---- a/criu/namespaces.c -+++ b/criu/namespaces.c -@@ -28,6 +28,7 @@ - #include "cgroup.h" - #include "fdstore.h" - #include "kerndat.h" -+#include "taskqueue.h" - - #include "protobuf.h" - #include "util.h" -@@ -1607,11 +1608,15 @@ int collect_namespaces(bool for_dump) - { - int ret; - -- ret = collect_user_namespaces(for_dump); -+ ret = collect_mnt_namespaces(for_dump); - if (ret < 0) - return ret; - -- ret = collect_mnt_namespaces(for_dump); -+ /* need mnt info provided by `mntinfo` */ -+ if (opts.parallel && start_collect_mappings_thread()) -+ return -1; -+ -+ ret = collect_user_namespaces(for_dump); - if (ret < 0) - return ret; - -diff --git a/criu/taskqueue.c b/criu/taskqueue.c -new file mode 100644 -index 0000000..1196a5e ---- /dev/null -+++ b/criu/taskqueue.c -@@ -0,0 +1,124 @@ -+/* -+ * Target: -+ * parallel dump process -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#include "pstree.h" -+#include "log.h" -+#include "taskqueue.h" -+ -+/* -+ * Sometimes, only one cpu can be used which is bad for parallel routine. -+ * Therefore, set cpu affinity for criu routine. -+ */ -+static int set_cpuaffinity(void) -+{ -+ cpu_set_t *set; -+ int num_cpus = get_nprocs_conf(); -+ size_t cpusetsize = CPU_ALLOC_SIZE(num_cpus); -+ int retval; -+ -+ set = CPU_ALLOC(num_cpus); -+ memset(set, 0xff, cpusetsize); -+ -+ retval = sched_setaffinity(getpid(), cpusetsize, set); -+ if (retval != 0) -+ pr_err("sched_setaffinity failed: %s\n", strerror(errno)); -+ -+ CPU_FREE(set); -+ -+ return retval; -+} -+ -+int init_parallel_env(void) -+{ -+ return set_cpuaffinity(); -+} -+ -+static void *collect_mappings_routine(void *_arg) -+{ -+ struct mappings_info *info = _arg; -+ -+ info->queue_result = collect_mappings(info->pid, info->vmas, info->dump_file); -+ return NULL; -+} -+ -+int dump_filemap(struct vma_area *vma_area, int fd); /* defined in criu/cr-dump.c */ -+ -+int start_collect_mappings_thread(void) -+{ -+ struct pstree_item *pi; -+ struct mappings_info *info; -+ -+ for_each_pstree_item(pi) { -+ /* disable parallel collect for non-root item because of the -+ * concurrence. -+ */ -+ if (pi->pid->real != root_item->pid->real) -+ continue; -+ -+ info = &pi->maps_info; -+ -+ info->vmas = xmalloc(sizeof(struct vm_area_list)); -+ if (info->vmas == NULL) { -+ pr_err("xzalloc vmas no memory\n"); -+ return -1; -+ } -+ vm_area_list_init(info->vmas); -+ -+ info->pid = pi->pid->real; -+ info->dump_file = dump_filemap; -+ info->queue_routine = collect_mappings_routine; -+ info->queue_arg = info; -+ -+ pr_info("Start thread to collect %d mappings\n", info->pid); -+ -+ if (taskqueue_create(&info->queue) < 0) { -+ pr_err("parallel_collect_mappings failed: %s\n", strerror(errno)); -+ free(info->vmas); -+ /* -+ * Don't care other threads status, use `exit_group()` -+ * to ensure all threads exit. -+ */ -+ return -1; -+ } -+ } -+ -+ return 0; -+} -+ -+int end_collect_mappings_thread(struct pstree_item *item) -+{ -+ struct mappings_info *info = &item->maps_info; -+ int retval; -+ -+ /* disable parallel collect for non-root item because of the -+ * concurrence. -+ */ -+ if (root_item->pid->real != item->pid->real) -+ return 0; -+ -+ retval = taskqueue_join(&info->queue); -+ if (retval != 0 || info->queue_result != 0) { -+ pr_err("taskqueue_join failed, retval %d(errno %d: %s)," -+ " queue_result: %d\n", -+ retval, -+ retval == 0 ? 0 : errno, -+ retval == 0 ? "nil" : strerror(errno), -+ info->queue_result); -+ retval = -1; -+ } -+ -+ pr_info("End thread to collect %d mappings\n", info->pid); -+ -+ /* -+ * Don't care other threads status, use `exit_group()` to ensure all -+ * threads exit. -+ */ -+ return retval; -+} --- -2.34.1 - diff --git a/0053-mm-add-exec-file-mapping-pin-method.patch b/0053-mm-add-exec-file-mapping-pin-method.patch deleted file mode 100644 index b6915d27ce0a184c13711b520d6a6437fed3e936..0000000000000000000000000000000000000000 --- a/0053-mm-add-exec-file-mapping-pin-method.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 5acbfc773177797d954645e40ba8f7ed94a55d60 Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Tue, 30 Nov 2021 11:38:18 +0800 -Subject: [PATCH 53/72] mm: add exec file mapping pin method - -In order to improve criu dump and restore performance, -enable pin method for exec file mapping. - -Signed-off-by: Jingxian He ---- - criu/config.c | 4 ++++ - criu/crtools.c | 1 + - criu/include/cr_options.h | 1 + - criu/mem.c | 12 +++++++++++- - criu/pin-mem.c | 4 ++++ - 5 files changed, 21 insertions(+), 1 deletion(-) - -diff --git a/criu/config.c b/criu/config.c -index fdbc5eb..c0358e5 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -710,6 +710,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - { "share-src-ports", required_argument, 0, 2001 }, - { "reserve-ports", required_argument, 0, 'P' }, - BOOL_OPT("parallel", &opts.parallel), -+ { "exec-pin-start", required_argument, 0, 2002 }, - {}, - }; - -@@ -1051,6 +1052,9 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - case 2001: - SET_CHAR_OPTS(share_src_ports, optarg); - break; -+ case 2002: -+ opts.exec_pin_start = atoi(optarg); -+ break; - case 'V': - pr_msg("Version: %s\n", CRIU_VERSION); - if (strcmp(CRIU_GITID, "0")) -diff --git a/criu/crtools.c b/criu/crtools.c -index c20b3b7..40e2d51 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -473,6 +473,7 @@ usage: - " --file-locks-repair Use repair mode to dump and restore file locks\n" - " --reserve-ports Reserve src ports in kernel\n" - " --parallel Collect smaps parallel to accellrate dumping speed\n" -+ " --exec-pin-start Exec file map's pin start index\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index 6478d4d..a64e977 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -202,6 +202,7 @@ struct cr_options { - char *share_src_ports; - int reserve_ports; - int parallel; -+ int exec_pin_start; - }; - - extern struct cr_options opts; -diff --git a/criu/mem.c b/criu/mem.c -index b955d66..ccb6ae6 100644 ---- a/criu/mem.c -+++ b/criu/mem.c -@@ -448,6 +448,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit - int possible_pid_reuse = 0; - bool has_parent; - int parent_predump_mode = -1; -+ int dump_iov; - - pr_info("\n"); - pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real); -@@ -521,9 +522,18 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit - if (mdc->parent_ie) - parent_predump_mode = mdc->parent_ie->pre_dump_mode; - -+ dump_iov = 0; - list_for_each_entry(vma_area, &vma_area_list->h, list) { - if (opts.pin_memory && should_pin_vmae(vma_area->e)) { -- continue; -+ if (opts.exec_pin_start -+ && vma_entry_is(vma_area->e, VMA_FILE_PRIVATE) -+ && ((vma_area->e->prot & PROT_WRITE) -+ || !(vma_area->e->prot & PROT_EXEC))) { -+ dump_iov += 1; -+ if (dump_iov > opts.exec_pin_start + 1) -+ continue; -+ } else -+ continue; - } - - if (vma_entry_is(vma_area->e, VMA_AREA_ANON_INODE)) -diff --git a/criu/pin-mem.c b/criu/pin-mem.c -index 96ca2c5..686217f 100644 ---- a/criu/pin-mem.c -+++ b/criu/pin-mem.c -@@ -2,6 +2,7 @@ - #include - #include - -+#include "cr_options.h" - #include "pstree.h" - #include "mem.h" - #include "vma.h" -@@ -30,6 +31,9 @@ bool should_pin_vmae(VmaEntry *vmae) - if (vma_entry_is(vmae, VMA_ANON_PRIVATE)) - return true; - -+ if (opts.exec_pin_start && vma_entry_is(vmae, VMA_FILE_PRIVATE)) -+ return true; -+ - return false; - } - --- -2.34.1 - diff --git a/0054-ptrace-trace-specific-syscall.patch b/0054-ptrace-trace-specific-syscall.patch deleted file mode 100644 index b94469e21751d763558131f970db2a68ff1f8b03..0000000000000000000000000000000000000000 --- a/0054-ptrace-trace-specific-syscall.patch +++ /dev/null @@ -1,774 +0,0 @@ -From 47412ba0d9ce6283071973387bf5b34bf876bb9a Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Wed, 1 Dec 2021 09:44:07 +0800 -Subject: [PATCH 54/72] ptrace: trace specific syscall - -criu use `ptrace(PTRACE_SYSCALL)` to watch whether the tracee steps in -correct status, it isn't necessory to stop tracee at every syscall. -Therefore, customizing `ptrace(PTRACE_SYSCALL_NR)` to make tracee stop at -the specific syscall can save time (1000 threads consume about 140ms). - -ptrace syntax: - long ptrace(PTRACE_SYSCALL_NR, pid_t pid, void *addr, void *data); - -The argument `addr` is unused in original `ptrace(PTRACE_SYSCALL)`, -Here `ptrace(PTRACE_SYSCALL_NR)` use `addr` parameter to give the -specific sysno which is wanted to trace. - -use `criu check` to generate `/run/criu.kdat` before the first usage of -criu, or auto-check during `criu {dump, restore}`. - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/25 - -Signed-off-by: fu.lin ---- - compel/Makefile | 1 + - compel/include/uapi/bisect.h | 30 +++++++ - compel/include/uapi/infect.h | 15 +++- - compel/src/lib/bisect.c | 92 +++++++++++++++++++ - compel/src/lib/infect.c | 167 ++++++++++++++++++++++++++++++++--- - criu/cr-dump.c | 2 +- - criu/cr-restore.c | 97 +++++++++++++++++++- - criu/include/kerndat.h | 1 + - criu/kerndat.c | 61 +++++++++++++ - 9 files changed, 450 insertions(+), 16 deletions(-) - create mode 100644 compel/include/uapi/bisect.h - create mode 100644 compel/src/lib/bisect.c - -diff --git a/compel/Makefile b/compel/Makefile -index b79aee6..2168a26 100644 ---- a/compel/Makefile -+++ b/compel/Makefile -@@ -27,6 +27,7 @@ lib-y += src/lib/infect-rpc.o - lib-y += src/lib/infect-util.o - lib-y += src/lib/infect.o - lib-y += src/lib/ptrace.o -+lib-y += src/lib/bisect.o - - ifeq ($(ARCH),x86) - lib-y += arch/$(ARCH)/src/lib/thread_area.o -diff --git a/compel/include/uapi/bisect.h b/compel/include/uapi/bisect.h -new file mode 100644 -index 0000000..55ebcbd ---- /dev/null -+++ b/compel/include/uapi/bisect.h -@@ -0,0 +1,30 @@ -+#ifndef __COMPEL_BISECT_H__ -+#define __COMPEL_BISECT_H__ -+ -+#include -+ -+enum tf { -+ TRACE_INTERRUPT, -+ TRACE_SYSCALL_ENTER, -+ TRACE_SYSCALL_EXIT, -+}; -+ -+struct trace_flag { -+ pid_t key; -+ enum tf flag; -+}; -+ -+struct bisect_meta { -+ int size; -+ int used; -+ void *data; /* data pointer array */ -+ void *__data; /* data array */ -+}; -+ -+struct trace_flag *tf_bisect(struct bisect_meta *meta, pid_t key); -+struct trace_flag *tf_insert(struct bisect_meta *meta, pid_t key); -+int tf_create(struct bisect_meta *meta, int len); -+void tf_destroy(struct bisect_meta *meta); -+void tf_clear(struct bisect_meta *meta); -+ -+#endif /* __COMPEL_BISECT_H__ */ -diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h -index 389878e..a23782e 100644 ---- a/compel/include/uapi/infect.h -+++ b/compel/include/uapi/infect.h -@@ -8,11 +8,16 @@ - #include - #include - #include -+#include - - #include "common/compiler.h" - - #define PARASITE_START_AREA_MIN (4096) - -+#ifndef PTRACE_SYSCALL_NR -+# define PTRACE_SYSCALL_NR 0xff00 -+#endif -+ - extern int __must_check compel_interrupt_task(int pid); - - struct seize_task_status { -@@ -41,7 +46,7 @@ extern int __must_check compel_infect(struct parasite_ctl *ctl, unsigned long nr - extern struct parasite_thread_ctl __must_check *compel_prepare_thread(struct parasite_ctl *ctl, int pid); - extern void compel_release_thread(struct parasite_thread_ctl *); - --extern int __must_check compel_stop_daemon(struct parasite_ctl *ctl); -+extern int __must_check compel_stop_daemon(struct parasite_ctl *ctl, bool customize); - extern int __must_check compel_cure_remote(struct parasite_ctl *ctl); - extern int __must_check compel_cure_local(struct parasite_ctl *ctl); - extern int __must_check compel_cure(struct parasite_ctl *ctl); -@@ -83,6 +88,14 @@ extern int __must_check compel_stop_pie(pid_t pid, void *addr, enum trace_flags - - extern int __must_check compel_unmap(struct parasite_ctl *ctl, unsigned long addr); - -+extern int __must_check compel_stop_on_syscall_customize(int tasks, -+ const int sys_nr, const int exit_sys_nr, struct bisect_meta *meta); -+ -+extern int __must_check compel_stop_pie_customize(pid_t pid, -+ const int sys_nr, struct trace_flag *tf); -+ -+extern int __must_check compel_unmap_customize(struct parasite_ctl *ctl, unsigned long addr); -+ - extern int compel_mode_native(struct parasite_ctl *ctl); - - extern k_rtsigset_t *compel_task_sigmask(struct parasite_ctl *ctl); -diff --git a/compel/src/lib/bisect.c b/compel/src/lib/bisect.c -new file mode 100644 -index 0000000..807a5a9 ---- /dev/null -+++ b/compel/src/lib/bisect.c -@@ -0,0 +1,92 @@ -+#include -+ -+#include "log.h" -+#include "common/xmalloc.h" -+#include "bisect.h" -+ -+struct trace_flag *tf_bisect(struct bisect_meta *meta, pid_t key) -+{ -+ struct trace_flag **tfs = meta->data; -+ int lo = 0, hi = meta->used, mid; -+ -+ if (meta->used <= 0) -+ return NULL; -+ -+ while (lo < hi) { -+ mid = (int)((lo + hi) / 2); -+ if (tfs[mid]->key == key) { -+ return tfs[mid]; -+ } else if (tfs[mid]->key > key) { -+ hi = mid; -+ } else { -+ lo = mid + 1; -+ } -+ } -+ -+ return NULL; -+} -+ -+/* used in cr-restore */ -+struct trace_flag *tf_insert(struct bisect_meta *meta, pid_t key) -+{ -+ struct trace_flag **tfs = meta->data; -+ struct trace_flag *tf = &((struct trace_flag *)meta->__data)[meta->used]; -+ int i = 0, j = 0; -+ -+ if (meta->used == meta->size) -+ return NULL; -+ -+ for (i = 0; i < meta->used; i++) { -+ if (tfs[i]->key >= key) /* impossible condition: `tfs[i]->key == key` */ -+ break; -+ } -+ -+ j = meta->used; -+ meta->used += 1; -+ -+ while (j > i) { -+ tfs[j] = tfs[j-1]; -+ j -= 1; -+ } -+ -+ tfs[i] = tf; -+ tf->key = key; -+ -+ return tf; -+} -+ -+int tf_create(struct bisect_meta *meta, int len) -+{ -+ struct trace_flag *tfs; -+ struct trace_flag **tfs_ptr; -+ -+ tfs = xzalloc(sizeof(*tfs) * len); -+ if (tfs == NULL) -+ return -1; -+ -+ tfs_ptr = xmalloc(sizeof(*tfs_ptr) * len); -+ if (tfs_ptr == NULL) -+ goto err; -+ -+ meta->size = len; -+ meta->used = 0; -+ meta->__data = tfs; -+ meta->data = tfs_ptr; -+ -+ return 0; -+err: -+ xfree(tfs); -+ return -1; -+} -+ -+void tf_destroy(struct bisect_meta *meta) -+{ -+ xfree(meta->__data); -+ xfree(meta->data); -+} -+ -+void tf_clear(struct bisect_meta *meta) -+{ -+ meta->used = 0; -+ __builtin_memset(meta->data, 0, sizeof(struct trace_flag **)*meta->size); -+} -diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c -index 6a13cc1..f9b8832 100644 ---- a/compel/src/lib/infect.c -+++ b/compel/src/lib/infect.c -@@ -449,7 +449,7 @@ static int restore_child_handler(struct parasite_ctl *ctl) - } - - static int parasite_run(pid_t pid, int cmd, unsigned long ip, void *stack, user_regs_struct_t *regs, -- struct thread_ctx *octx) -+ struct thread_ctx *octx, void *addr) - { - k_rtsigset_t block; - -@@ -470,7 +470,7 @@ static int parasite_run(pid_t pid, int cmd, unsigned long ip, void *stack, user_ - goto err_regs; - } - -- if (ptrace(cmd, pid, NULL, NULL)) { -+ if (ptrace(cmd, pid, addr, NULL)) { - pr_perror("Can't run parasite at %d", pid); - goto err_cont; - } -@@ -575,7 +575,7 @@ int compel_execute_syscall(struct parasite_ctl *ctl, user_regs_struct_t *regs, c - return -1; - } - -- err = parasite_run(pid, PTRACE_CONT, ctl->ictx.syscall_ip, 0, regs, &ctl->orig); -+ err = parasite_run(pid, PTRACE_CONT, ctl->ictx.syscall_ip, 0, regs, &ctl->orig, NULL); - if (!err) - err = parasite_trap(ctl, pid, regs, &ctl->orig, false); - -@@ -592,7 +592,7 @@ int compel_run_at(struct parasite_ctl *ctl, unsigned long ip, user_regs_struct_t - user_regs_struct_t regs = ctl->orig.regs; - int ret; - -- ret = parasite_run(ctl->rpid, PTRACE_CONT, ip, 0, ®s, &ctl->orig); -+ ret = parasite_run(ctl->rpid, PTRACE_CONT, ip, 0, ®s, &ctl->orig, NULL); - if (!ret) - ret = parasite_trap(ctl, ctl->rpid, ret_regs ? ret_regs : ®s, &ctl->orig, false); - return ret; -@@ -641,7 +641,7 @@ static int parasite_init_daemon(struct parasite_ctl *ctl) - goto err; - - regs = ctl->orig.regs; -- if (parasite_run(pid, PTRACE_CONT, ctl->parasite_ip, ctl->rstack, ®s, &ctl->orig)) -+ if (parasite_run(pid, PTRACE_CONT, ctl->parasite_ip, ctl->rstack, ®s, &ctl->orig, NULL)) - goto err; - - futex_wait_while_eq(&args->daemon_connected, 0); -@@ -1303,7 +1303,7 @@ static bool task_in_parasite(struct parasite_ctl *ctl, user_regs_struct_t *regs) - return addr >= ctl->remote_map && addr < ctl->remote_map + ctl->map_length; - } - --static int parasite_fini_seized(struct parasite_ctl *ctl) -+static int parasite_fini_seized(struct parasite_ctl *ctl, bool customize) - { - pid_t pid = ctl->rpid; - user_regs_struct_t regs; -@@ -1348,6 +1348,34 @@ static int parasite_fini_seized(struct parasite_ctl *ctl) - if (ret) - return -1; - -+ /* use customize ptrace */ -+ if (customize) { -+ struct trace_flag tf = { .key = pid, .flag = TRACE_SYSCALL_ENTER }; -+ struct trace_flag *tf_ptr[] = { &tf }; -+ struct bisect_meta meta = { -+ .size = 1, -+ .used = 1, -+ .__data = &tf, -+ .data = tf_ptr, -+ }; -+ -+ ret = compel_stop_pie_customize(pid, __NR(rt_sigreturn, 0), &tf); -+ if (ret < 0) -+ return ret; -+ -+ /* The process is going to execute the required syscall, the -+ * original syscall should be forgot(set `-1`) in -+ * `syscall_trace_enter()` handler in kernel when no other -+ * else operation in tracer. -+ * -+ * Note: -1 means NO_SYSCALL which is defined in -+ * `arch/arm64/include/asm/ptrace.h`. -+ */ -+ return compel_stop_on_syscall_customize(1, -+ __NR(rt_sigreturn, 0), -+ -1, &meta); -+ } -+ - /* Go to sigreturn as closer as we can */ - ret = compel_stop_pie(pid, ctl->sigreturn_addr, &flag, ctl->ictx.flags & INFECT_NO_BREAKPOINTS); - if (ret < 0) -@@ -1368,7 +1396,7 @@ static int parasite_fini_seized(struct parasite_ctl *ctl) - return 0; - } - --int compel_stop_daemon(struct parasite_ctl *ctl) -+int compel_stop_daemon(struct parasite_ctl *ctl, bool customize) - { - if (ctl->daemonized) { - /* -@@ -1378,7 +1406,7 @@ int compel_stop_daemon(struct parasite_ctl *ctl) - if (ctl->tsock < 0) - return -1; - -- if (parasite_fini_seized(ctl)) { -+ if (parasite_fini_seized(ctl, customize)) { - close_safe(&ctl->tsock); - return -1; - } -@@ -1394,7 +1422,7 @@ int compel_cure_remote(struct parasite_ctl *ctl) - long ret; - int err; - -- if (compel_stop_daemon(ctl)) -+ if (compel_stop_daemon(ctl, false)) - return -1; - - if (!ctl->remote_map) -@@ -1461,7 +1489,7 @@ int compel_run_in_thread(struct parasite_thread_ctl *tctl, unsigned int cmd) - - *ctl->cmd = cmd; - -- ret = parasite_run(pid, PTRACE_CONT, ctl->parasite_ip, stack, ®s, octx); -+ ret = parasite_run(pid, PTRACE_CONT, ctl->parasite_ip, stack, ®s, octx, NULL); - if (ret == 0) - ret = parasite_trap(ctl, pid, ®s, octx, true); - if (ret == 0) -@@ -1484,7 +1512,7 @@ int compel_unmap(struct parasite_ctl *ctl, unsigned long addr) - pid_t pid = ctl->rpid; - int ret = -1; - -- ret = parasite_run(pid, PTRACE_SYSCALL, addr, ctl->rstack, ®s, &ctl->orig); -+ ret = parasite_run(pid, PTRACE_SYSCALL, addr, ctl->rstack, ®s, &ctl->orig, NULL); - if (ret) - goto err; - -@@ -1500,6 +1528,45 @@ err: - return ret; - } - -+int compel_unmap_customize(struct parasite_ctl *ctl, unsigned long addr) -+{ -+ user_regs_struct_t regs = ctl->orig.regs; -+ pid_t pid = ctl->rpid; -+ int ret = -1; -+ struct trace_flag tf = { .key = pid, .flag = TRACE_SYSCALL_ENTER }; -+ struct trace_flag *tf_ptr[] = { &tf }; -+ struct bisect_meta meta = { -+ .size = 1, -+ .used = 1, -+ .__data = &tf, -+ .data = tf_ptr, -+ }; -+ -+ /* -+ * Here it parasite code. Unlike trap code `compel_stop_pie()`, it -+ * won't let tracee forget the original syscall. In such way, tracer -+ * just trace the syscall called by tracee. The log likes the following -+ * if tracee forget syscall: -+ * -+ * [ 817.638332] set pid 1877 ptrace sysno 215 -+ * [ 817.638343] syscall_trace_enter: pid 1877 ptrace_sysno 0 current_sysno 215 -+ * [ 817.638363] (00.006280) Error (compel/src/lib/infect.c:1582): 1877 (native) is going to execute the syscall 215, required is 215 -+ * [ 817.638368] set pid 1877 ptrace sysno 0 -+ * [ 817.638402] syscall_trace_exit: pid 1877 ptrace_sysno 0 current_sysno 215 -+ */ -+ ret = parasite_run(pid, PTRACE_SYSCALL_NR, addr, ctl->rstack, ®s, -+ &ctl->orig, (void *)(long)__NR(munmap, 0)); -+ if (ret) -+ goto err; -+ -+ ret = compel_stop_on_syscall_customize(1, __NR(munmap, 0), 0, &meta); -+ -+ if (restore_thread_ctx(pid, &ctl->orig, false)) -+ ret = -1; -+err: -+ return ret; -+} -+ - int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp) - { - int ret; -@@ -1535,6 +1602,17 @@ int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp) - return 0; - } - -+int compel_stop_pie_customize(pid_t pid, const int sys_nr, struct trace_flag *tf) -+{ -+ if (ptrace(PTRACE_SYSCALL_NR, pid, sys_nr, NULL)) { -+ pr_perror("Unable to restart the %d process", pid); -+ return -1; -+ } -+ -+ tf->flag = TRACE_SYSCALL_ENTER; -+ return 0; -+} -+ - static bool task_is_trapped(int status, pid_t pid) - { - if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) -@@ -1642,6 +1720,73 @@ int compel_stop_on_syscall(int tasks, const int sys_nr, const int sys_nr_compat, - return 0; - } - -+int compel_stop_on_syscall_customize(int tasks, const int sys_nr, -+ const int exit_sys_nr, struct bisect_meta *meta) -+{ -+ struct trace_flag *tf; -+ user_regs_struct_t regs; -+ int status, ret; -+ pid_t pid; -+ -+ while (tasks) { -+ pid = wait4(-1, &status, __WALL, NULL); -+ if (pid == -1) { -+ pr_perror("wait4 failed"); -+ return -1; -+ } -+ -+ tf = tf_bisect(meta, pid); -+ if (tf == NULL) { -+ pr_warn("Unexpected task %d, state %d signal %d: %s\n", -+ pid, WEXITSTATUS(status), -+ WTERMSIG(status), strsignal(WTERMSIG(status))); -+ continue; -+ } -+ -+ if (!task_is_trapped(status, pid)) -+ return -1; -+ -+ switch (tf->flag) { -+ case TRACE_SYSCALL_ENTER: -+ pr_debug("%d was trapped\n", pid); -+ pr_debug("`- Expecting exit\n"); -+ -+ ret = ptrace_get_regs(pid, ®s); -+ if (ret) { -+ pr_perror("ptrace"); -+ return -1; -+ } -+ -+ if (is_required_syscall(®s, pid, sys_nr, sys_nr)) { -+ ret = ptrace(PTRACE_SYSCALL_NR, pid, exit_sys_nr, NULL); -+ if (ret) { -+ pr_perror("ptrace"); -+ return -1; -+ } -+ tf->flag = TRACE_SYSCALL_EXIT; -+ } else { -+ pr_warn("Impossible condition, check the system, try our best to restore...\n"); -+ ret = ptrace(PTRACE_SYSCALL_NR, pid, sys_nr, NULL); -+ if (ret) { -+ pr_perror("ptrace"); -+ return -1; -+ } -+ } -+ break; -+ case TRACE_SYSCALL_EXIT: -+ pr_debug("%d was stopped\n", pid); -+ tasks--; -+ break; -+ -+ default: -+ pr_err("pid %d invalid status: %d\n", pid, tf->flag); -+ return -1; -+ } -+ } -+ -+ return 0; -+} -+ - int compel_mode_native(struct parasite_ctl *ctl) - { - return user_regs_native(&ctl->orig.regs); -diff --git a/criu/cr-dump.c b/criu/cr-dump.c -index ee826c0..9253e91 100644 ---- a/criu/cr-dump.c -+++ b/criu/cr-dump.c -@@ -1708,7 +1708,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) - goto err_cure; - } - -- ret = compel_stop_daemon(parasite_ctl); -+ ret = compel_stop_daemon(parasite_ctl, kdat.has_customize_ptrace); - if (ret) { - pr_err("Can't stop daemon in parasite (pid: %d)\n", pid); - goto err_cure; -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index d19768d..b0b3d30 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -2181,6 +2181,64 @@ static int catch_tasks(bool root_seized, enum trace_flags *flag) - return 0; - } - -+static int cache_tasks_customize(bool root_seized, struct bisect_meta *meta) -+{ -+ struct pstree_item *item; -+ struct trace_flag *tf; -+ -+ for_each_pstree_item(item) { -+ int status, i, ret; -+ pid_t pid; -+ -+ if (!task_alive(item)) -+ continue; -+ -+ if (item->nr_threads == 1) { -+ item->threads[0].real = item->pid->real; -+ } else { -+ if (parse_threads(item->pid->real, &item->threads, &item->nr_threads)) -+ return -1; -+ } -+ -+ for (i = 0; i < item->nr_threads; i++) { -+ pid = item->threads[i].real; -+ -+ if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) { -+ pr_perror("Can't interrupt the %d task", pid); -+ return -1; -+ } -+ -+ tf = tf_insert(meta, pid); -+ if (tf == NULL) { -+ pr_err("Can't find trace flag for %d, used %d\n", -+ pid, meta->used); -+ return -1; -+ } -+ tf->flag = TRACE_INTERRUPT; -+ } -+ -+ for (i = 0; i < item->nr_threads; i++) { -+ pid = wait4(-1, &status, __WALL, NULL); -+ -+ tf = tf_bisect(meta, pid); -+ if (tf == NULL) { -+ pr_err("Can't find trace flag for %d, used %d\n", -+ pid, meta->used); -+ return -1; -+ } -+ -+ ret = compel_stop_pie_customize(pid, -+ __NR(rt_sigreturn, 0), -+ tf); -+ if (ret < 0) -+ return -1; -+ -+ } -+ } -+ -+ return 0; -+} -+ - static int clear_breakpoints(void) - { - struct pstree_item *item; -@@ -2207,6 +2265,7 @@ static void finalize_restore(void) - pid_t pid = item->pid->real; - struct parasite_ctl *ctl; - unsigned long restorer_addr; -+ int retval; - - if (!task_alive(item)) - continue; -@@ -2217,7 +2276,12 @@ static void finalize_restore(void) - continue; - - restorer_addr = (unsigned long)rsti(item)->munmap_restorer; -- if (compel_unmap(ctl, restorer_addr)) -+ if (!kdat.has_customize_ptrace) -+ retval = compel_unmap(ctl, restorer_addr); -+ else -+ retval = compel_unmap_customize(ctl, restorer_addr); -+ -+ if (retval) - pr_err("Failed to unmap restorer from %d\n", pid); - - xfree(ctl); -@@ -2333,11 +2397,18 @@ static void reap_zombies(void) - - static int restore_root_task(struct pstree_item *init) - { -+ struct bisect_meta tfs_meta; - enum trace_flags flag = TRACE_ALL; - int ret, fd, mnt_ns_fd = -1; - int root_seized = 0; - struct pstree_item *item; - -+ if (kdat.has_customize_ptrace -+ && tf_create(&tfs_meta, task_entries->nr_threads) != 0) { -+ pr_err("Can't alloc memory, tf_create failed\n"); -+ return -1; -+ } -+ - ret = run_scripts(ACT_PRE_RESTORE); - if (ret != 0) { - pr_err("Aborting restore due to pre-restore script ret code %d\n", ret); -@@ -2551,7 +2622,12 @@ skip_ns_bouncing: - - timing_stop(TIME_RESTORE); - -- if (catch_tasks(root_seized, &flag)) { -+ if (!kdat.has_customize_ptrace) -+ ret = catch_tasks(root_seized, &flag); -+ else -+ ret = cache_tasks_customize(root_seized, &tfs_meta); -+ -+ if (ret) { - pr_err("Can't catch all tasks\n"); - goto out_kill_network_unlocked; - } -@@ -2561,7 +2637,15 @@ skip_ns_bouncing: - - __restore_switch_stage(CR_STATE_COMPLETE); - -- ret = compel_stop_on_syscall(task_entries->nr_threads, __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1), flag); -+ if (!kdat.has_customize_ptrace) { -+ ret = compel_stop_on_syscall(task_entries->nr_threads, -+ __NR(rt_sigreturn, 0), -+ __NR(rt_sigreturn, 1), flag); -+ } else { -+ ret = compel_stop_on_syscall_customize(task_entries->nr_threads, -+ __NR(rt_sigreturn, 0), -+ -1, &tfs_meta); -+ } - if (ret) { - pr_err("Can't stop all tasks on rt_sigreturn\n"); - goto out_kill_network_unlocked; -@@ -2600,6 +2684,9 @@ skip_ns_bouncing: - reap_zombies(); - } - -+ if (kdat.has_customize_ptrace) -+ tf_destroy(&tfs_meta); -+ - return 0; - - out_kill_network_unlocked: -@@ -2631,6 +2718,10 @@ out: - stop_usernsd(); - __restore_switch_stage(CR_STATE_FAIL); - pr_err("Restoring FAILED.\n"); -+ -+ if (kdat.has_customize_ptrace) -+ tf_destroy(&tfs_meta); -+ - return -1; - } - -diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h -index 3979939..8034db9 100644 ---- a/criu/include/kerndat.h -+++ b/criu/include/kerndat.h -@@ -77,6 +77,7 @@ struct kerndat_s { - bool has_rseq; - bool has_ptrace_get_rseq_conf; - bool has_unix_sk_repair; -+ bool has_customize_ptrace; - }; - - extern struct kerndat_s kdat; -diff --git a/criu/kerndat.c b/criu/kerndat.c -index 6d6aac1..630814e 100644 ---- a/criu/kerndat.c -+++ b/criu/kerndat.c -@@ -1289,6 +1289,66 @@ static void kerndat_has_unix_sk_repair(void) - return; - } - -+static void kerndat_has_customize_ptrace(void) -+{ -+ pid_t tracee = fork(); -+ int status; -+ int retval; -+ -+ if (tracee == 0) { -+ /* ensure */ -+ prctl(PR_SET_PDEATHSIG, SIGKILL); -+ -+ while (true) -+ sleep(1); -+ } else if (tracee > 0) { -+ pr_debug("fork task %d as tracee\n", tracee); -+ retval = ptrace(PTRACE_ATTACH, tracee, 0, 0); -+ if (retval < 0) { -+ pr_perror("Unexpect error from ptrace(PTRACE_ATTACH)"); -+ return; -+ } -+ -+ retval = wait4(-1, &status, __WALL, NULL); -+ if (retval == -1) -+ pr_perror("Unexpect error from wait"); -+ else if (retval != tracee || !(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP)) -+ pr_err("Task %d (expect %d) is unexpect, status: %d," -+ " stoped: %d signal: %d(%s)\n", -+ retval, tracee, status, -+ WIFSTOPPED(status), WSTOPSIG(status), -+ strsignal(WTERMSIG(status))); -+ else { -+ retval = ptrace(PTRACE_SYSCALL_NR, tracee, 0, 0); -+ if (retval == 0) -+ kdat.has_customize_ptrace = true; -+ else -+ pr_perror("Unexpect error from ptrace(PTRACE_SYSCALL_NR)"); -+ } -+ -+ if (kill(tracee, SIGKILL) != 0) { -+ pr_perror("kill tracee %d failed", tracee); -+ return; -+ } -+ -+ /* -+ * To prevent wait4 unexpect task when criu.kdat is generated -+ * in dump process. -+ */ -+ retval = waitpid(tracee, &status, 0); -+ if (retval == -1) -+ pr_err("waitpid() failed"); -+ else -+ pr_debug("tracee %d exited, status %d, signal %d(%s)\n", -+ WEXITSTATUS(status), WTERMSIG(status), -+ WTERMSIG(status), strsignal(WTERMSIG(status))); -+ } else { -+ pr_perror("Unexpected error from fork\n"); -+ } -+ -+ return; -+} -+ - int kerndat_init(void) - { - int ret; -@@ -1451,6 +1511,7 @@ int kerndat_init(void) - } - - kerndat_has_unix_sk_repair(); -+ kerndat_has_customize_ptrace(); - - kerndat_lsm(); - kerndat_mmap_min_addr(); --- -2.34.1 - diff --git a/0055-notifier-rollback-when-open-img-failed.patch b/0055-notifier-rollback-when-open-img-failed.patch deleted file mode 100644 index 5784aa3b87b38860261c97be4caa37596011b32d..0000000000000000000000000000000000000000 --- a/0055-notifier-rollback-when-open-img-failed.patch +++ /dev/null @@ -1,150 +0,0 @@ -From c79a274b378173ac64d42d1c72df1ec594085d66 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Mon, 27 Dec 2021 21:34:39 +0800 -Subject: [PATCH 55/72] notifier: rollback when open img failed - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/26 - -Signed-off-by: fu.lin ---- - criu/cr-restore.c | 69 +++++++++++++++++++++++++++++++++++++++++++ - criu/include/pstree.h | 1 + - criu/pstree.c | 8 +++++ - 3 files changed, 78 insertions(+) - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index b0b3d30..13f0a93 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -1542,6 +1542,9 @@ static inline int fork_with_pid(struct pstree_item *item) - goto err_unlock; - } - -+ /* disable criu rollback capability. */ -+ criu_roll = false; -+ - if (item == root_item) { - item->pid->real = ret; - pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item)); -@@ -2757,6 +2760,71 @@ int prepare_dummy_task_state(struct pstree_item *pi) - return 0; - } - -+static int criu_rollback_internal(void *_arg) -+{ -+ bool unmask = *(int *)_arg; -+ pid_t pid = getpid(); -+ -+ if (unmask && mask_task_exit_notify(pid, false) != 0) -+ pr_err("unmask exit notify failed for %d\n", pid); -+ -+ do_notifier_rollback(true, POST_UPDATE_KERNEL_COMPLETE); -+ return 0; -+} -+ -+static void criu_rollback(void) -+{ -+ pid_t pid; -+ unsigned long clone_flags; -+ int retval = 0; -+ -+ if (!criu_roll || !opts.with_notifier_kup) -+ return; -+ -+ pid = vpid(root_item); -+ clone_flags = rsti(root_item)->clone_flags; -+ -+ pr_info("do criu rollback\n"); -+ -+ /* Some rollback notifier must be call in the specific task context. */ -+ if (opts.use_fork_pid) -+ retval = write_fork_pid(vpid(root_item)); -+ else if (!kdat.has_clone3_set_tid) -+ retval = set_next_pid((void *)&pid); -+ -+ if (retval < 0) { -+ pr_err("set next pid %d failed, can't do rollback.", pid); -+ return; -+ } -+ -+ if (!kdat.has_clone3_set_tid) { -+ retval = clone_noasan(criu_rollback_internal, -+ clone_flags | SIGCHLD, -+ &opts.mask_exit_notify); -+ } else { -+ retval = clone3_with_pid_noasan(criu_rollback_internal, -+ &opts.mask_exit_notify, -+ clone_flags, -+ SIGCHLD, pid); -+ } -+ -+ if (retval < 0) { -+ pr_err("Can't fork for %d to do rollback: %s.\n", -+ pid, strerror(errno)); -+ } else { -+ int status; -+ -+ if (retval != pid) -+ pr_err("clone pid %d isn't equal with %d\n", -+ retval, pid); -+ -+ if (waitpid(pid, &status, 0) < 0) { -+ pr_warn("Unable to wait %d: %s\n", -+ pid, strerror(errno)); -+ } -+ } -+} -+ - int cr_restore_tasks(void) - { - int ret = -1; -@@ -2831,6 +2899,7 @@ clean_cgroup: - err: - cr_plugin_fini(CR_PLUGIN_STAGE__RESTORE, ret); - if (ret < 0) { -+ criu_rollback(); - if (!!(network_status & NETWORK_COLLECTED) - && !files_collected() && collect_image(&inet_sk_cinfo)) - pr_err("collect inet sk cinfo fail\n"); -diff --git a/criu/include/pstree.h b/criu/include/pstree.h -index 87e4c47..6c0765b 100644 ---- a/criu/include/pstree.h -+++ b/criu/include/pstree.h -@@ -46,6 +46,7 @@ enum { - }; - #define FDS_EVENT (1 << FDS_EVENT_BIT) - -+extern bool criu_roll; - extern struct pstree_item *current; - - struct rst_info; -diff --git a/criu/pstree.c b/criu/pstree.c -index 778c884..8992155 100644 ---- a/criu/pstree.c -+++ b/criu/pstree.c -@@ -20,6 +20,11 @@ - #include "images/pstree.pb-c.h" - #include "crtools.h" - -+/* -+ * Sometimes, img may be broken, set flag here to enable roll capibility -+ * before forking restorer. -+ */ -+bool criu_roll; - struct pstree_item *root_item; - static struct rb_root pid_root_rb; - -@@ -638,6 +643,9 @@ static int read_pstree_image(pid_t *pid_max) - if (!img) - return -1; - -+ /* enable rollback capibility when opening img successfully. */ -+ criu_roll = true; -+ - do { - ret = read_one_pstree_item(img, pid_max); - } while (ret > 0); --- -2.34.1 - diff --git a/0056-detach-don-t-kill-task-when-ptrace-PTRACE_DETACH-ret.patch b/0056-detach-don-t-kill-task-when-ptrace-PTRACE_DETACH-ret.patch deleted file mode 100644 index bc57061b46ffd7063f8ca98828555f6624700146..0000000000000000000000000000000000000000 --- a/0056-detach-don-t-kill-task-when-ptrace-PTRACE_DETACH-ret.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 389a410ddfbca241bf724a4e4751fa96499ff6f1 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Thu, 30 Dec 2021 10:45:16 +0800 -Subject: [PATCH 56/72] detach: don't kill task when `ptrace(PTRACE_DETACH)` - return ESRCH - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/26 - -Signed-off-by: fu.lin ---- - criu/cr-restore.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index 13f0a93..c3ff65d 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -2317,6 +2317,16 @@ static int finalize_restore_detach(void) - return -1; - } - if (ptrace(PTRACE_DETACH, pid, NULL, 0)) { -+ /* -+ * There is delta between task resume and -+ * `ptrace(PTRACE_DETACH)`, task maybe exit -+ * initiative during this time. -+ */ -+ if (errno == ESRCH) { -+ pr_warn("Unable to detach %d, task has dead\n", pid); -+ continue; -+ } -+ - pr_perror("Unable to detach %d", pid); - return -1; - } --- -2.34.1 - diff --git a/0057-build-add-secure-compilation-options.patch b/0057-build-add-secure-compilation-options.patch deleted file mode 100644 index 97c7544ddef9d693d81e262b6656bdb0c85ad1d2..0000000000000000000000000000000000000000 --- a/0057-build-add-secure-compilation-options.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 4a3b351a69083567392a70bfb8d91c3f666e0aff Mon Sep 17 00:00:00 2001 -From: Jingxian He -Date: Wed, 19 May 2021 22:49:57 +0800 -Subject: [PATCH 57/72] build: add secure compilation options - -Add secure compilation options: --fstack-protector -fstack-protector-all --Wl,-z,relro,-z,now,-z,noexecstack - -Conflict:NA -Reference:https://gitee.com/src-openeuler/criu/pulls/21 -Signed-off-by: Fu Lin ---- - Makefile | 4 ++++ - criu/Makefile | 2 +- - criu/pie/Makefile | 1 + - criu/pie/Makefile.library | 2 ++ - lib/Makefile | 1 + - lib/c/Makefile | 2 +- - scripts/nmk/scripts/build.mk | 5 +++-- - 7 files changed, 13 insertions(+), 4 deletions(-) - -diff --git a/Makefile b/Makefile -index 08761ef..c1eafdd 100644 ---- a/Makefile -+++ b/Makefile -@@ -80,6 +80,10 @@ ifeq ($(ARCH),mips) - DEFINES := -DCONFIG_MIPS - endif - -+# secure compilation options -+CFLAGS += -fstack-protector-all -fPIE -+LDFLAGS += -pie -+ - # - # CFLAGS_PIE: - # -diff --git a/criu/Makefile b/criu/Makefile -index db4e9d8..3b4d69f 100644 ---- a/criu/Makefile -+++ b/criu/Makefile -@@ -85,7 +85,7 @@ $(obj)/%: pie - - $(obj)/criu: $(PROGRAM-BUILTINS) - $(call msg-link, $@) -- $(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(WRAPFLAGS) $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@ -+ $(Q) $(CC) $(CFLAGS) $^ $(LIBS) $(WRAPFLAGS) $(LDFLAGS) $(GMONLDOPT) -rdynamic -o $@ -Wl,-z,relro,-z,now,-z,noexecstack -fPIE -pie - - UNIT-BUILTINS += $(obj)/config.o - UNIT-BUILTINS += $(obj)/log.o -diff --git a/criu/pie/Makefile b/criu/pie/Makefile -index 265dcf8..40b5804 100644 ---- a/criu/pie/Makefile -+++ b/criu/pie/Makefile -@@ -6,6 +6,7 @@ target := parasite restorer - - CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) - CFLAGS += $(CFLAGS_PIE) -+CFLAGS := $(filter-out -fstack-protector -fstack-protector-all,$(CFLAGS)) - ccflags-y += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 - ccflags-y += -Wp,-U_FORTIFY_SOURCE -Wp,-D_FORTIFY_SOURCE=0 - -diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library -index da2a2fa..c022d06 100644 ---- a/criu/pie/Makefile.library -+++ b/criu/pie/Makefile.library -@@ -27,3 +27,5 @@ CFLAGS += $(CFLAGS_PIE) - ifeq ($(ARCH),mips) - CFLAGS += -fno-stack-protector -DCR_NOGLIBC -mno-abicalls -fno-pic - endif -+ -+CFLAGS := $(filter-out -fstack-protector -fstack-protector-all,$(CFLAGS)) -diff --git a/lib/Makefile b/lib/Makefile -index 575a7ba..729c298 100644 ---- a/lib/Makefile -+++ b/lib/Makefile -@@ -14,6 +14,7 @@ lib/c/Makefile: ; - lib/c/%: .FORCE - $(Q) $(MAKE) $(build)=lib/c $@ - -+CFLAGS := $(filter-out -fPIE,$(CFLAGS)) - cflags-so += $(CFLAGS) -rdynamic -Wl,-soname,$(CRIU_SO).$(CRIU_SO_VERSION_MAJOR) - ldflags-so += -lprotobuf-c - -diff --git a/lib/c/Makefile b/lib/c/Makefile -index af01467..d7f6491 100644 ---- a/lib/c/Makefile -+++ b/lib/c/Makefile -@@ -4,5 +4,5 @@ obj-y += ./images/rpc.pb-c.o - ccflags-y += -iquote criu/$(ARCH_DIR)/include - ccflags-y += -iquote criu/include - ccflags-y += -iquote images --ccflags-y += -fPIC -fno-stack-protector -+ccflags-y += -fPIC - ldflags-y += -r -z noexecstack -diff --git a/scripts/nmk/scripts/build.mk b/scripts/nmk/scripts/build.mk -index d01d2b7..6f366d7 100644 ---- a/scripts/nmk/scripts/build.mk -+++ b/scripts/nmk/scripts/build.mk -@@ -15,8 +15,9 @@ lib-name := - lib-target := - hostprogs-y := - libso-y := --ld_flags := --ldflags-so := -+ld_flags := -Wl,-z,relro,-z,now,-z,noexecstack -+ldflags-so := -Wl,-z,relro,-z,now,-z,noexecstack -+ldflags-y := -z relro -z now -z noexecstack - arflags-y := - target := - deps-y := --- -2.34.1 - diff --git a/0058-nftables-add-mnl-api.patch b/0058-nftables-add-mnl-api.patch deleted file mode 100644 index 4445acd96ac838ddf0161bc44f85819c2acb4ecb..0000000000000000000000000000000000000000 --- a/0058-nftables-add-mnl-api.patch +++ /dev/null @@ -1,283 +0,0 @@ -From e6dea32c64dfae3a6d06512b45f66416fc974556 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Wed, 11 Aug 2021 16:50:49 +0800 -Subject: [PATCH 58/72] nftables: add mnl api - -libmnl provides the communication between userspace and kernelspace for -netfilter netlink. I abstract here for the next usage. - -Signed-off-by: fu.lin ---- - criu/Makefile | 2 + - criu/Makefile.crtools | 1 + - criu/Makefile.packages | 6 ++ - criu/include/nftables.h | 28 +++++++ - criu/mnl.c | 165 ++++++++++++++++++++++++++++++++++++++++ - 5 files changed, 202 insertions(+) - create mode 100644 criu/include/nftables.h - create mode 100644 criu/mnl.c - -diff --git a/criu/Makefile b/criu/Makefile -index 3b4d69f..8d11bd5 100644 ---- a/criu/Makefile -+++ b/criu/Makefile -@@ -28,6 +28,8 @@ CFLAGS += -iquote images - CFLAGS += -iquote $(ARCH_DIR)/include - CFLAGS += -iquote . - CFLAGS += $(shell $(PKG_CONFIG) --cflags libnl-3.0) -+CFLAGS += $(shell $(PKG_CONFIG) --cflags libnftnl) -+CFLAGS += $(shell $(PKG_CONFIG) --cflags libmnl) - CFLAGS += $(CONFIG-DEFINES) - - ifeq ($(GMON),1) -diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools -index 2ad0207..a132810 100644 ---- a/criu/Makefile.crtools -+++ b/criu/Makefile.crtools -@@ -98,6 +98,7 @@ obj-y += reserved-ports.o - obj-y += orphan-inode.o - obj-y += kmsg.o - obj-y += taskqueue.o -+obj-y += mnl.o - obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o - obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o - CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 -diff --git a/criu/Makefile.packages b/criu/Makefile.packages -index 851489b..76e59ca 100644 ---- a/criu/Makefile.packages -+++ b/criu/Makefile.packages -@@ -7,6 +7,8 @@ REQ-RPM-PKG-NAMES += protobuf-python - REQ-RPM-PKG-NAMES += libnl3-devel - REQ-RPM-PKG-NAMES += libcap-devel - REQ-RPM-PKG-NAMES += $(PYTHON)-future -+REQ-RPM-PKG-NAMES += libmnl-devel -+REQ-RPM-PKG-NAMES += libnftnl-devel - - REQ-RPM-PKG-TEST-NAMES += libaio-devel - -@@ -18,6 +20,8 @@ REQ-DEB-PKG-NAMES += $(PYTHON)-protobuf - REQ-DEB-PKG-NAMES += $(PYTHON)-future - REQ-DEB-PKG-NAMES += libnl-3-dev - REQ-DEB-PKG-NAMES += libcap-dev -+REQ-DEB-PKG-NAMES += libmnl-dev -+REQ-DEB-PKG-NAMES += libnftnl-dev - - REQ-DEB-PKG-TEST-NAMES += $(PYTHON)-yaml - REQ-DEB-PKG-TEST-NAMES += libaio-dev -@@ -32,6 +36,8 @@ endif - - export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet - export LIBS += -lpthread -+export LIBS += $(shell $(PKG_CONFIG) --libs libmnl) -+export LIBS += $(shell $(PKG_CONFIG) --libs libnftnl) - - check-packages-failed: - $(warning Can not find some of the required libraries) -diff --git a/criu/include/nftables.h b/criu/include/nftables.h -new file mode 100644 -index 0000000..0bdab31 ---- /dev/null -+++ b/criu/include/nftables.h -@@ -0,0 +1,28 @@ -+#ifndef __CR_NFTABLES_H__ -+#define __CR_NFTABLES_H__ -+ -+#include -+ -+struct mnl_params { -+ struct mnl_socket *nl; -+ char *buf; -+ struct mnl_nlmsg_batch *batch; -+ uint32_t seq; -+}; -+ -+typedef struct nlmsghdr * (*buf_func_t)(struct mnl_params *mnl_params, void *args); -+typedef int (*batch_func_t)(struct mnl_params *mnl_params, void *args); -+typedef int (*mnl_func_t)(struct mnl_params *mnl, batch_func_t cb, void *args); -+ -+struct mnl_cb_params { -+ pid_t tree_id; -+ bool create; -+ bool ipv6; -+}; -+ -+int mnl_sendmsg(batch_func_t batch_cb, void *args); -+int mnl_common(mnl_func_t mnl_cb, void *arg1, void *arg2); -+int mnl_batch_send_and_recv(struct mnl_params *mnl_params, batch_func_t cb, void *args, int *result); -+int mnl_buf_send_and_recv(struct mnl_params *mnl_params, buf_func_t cb, void *args, int *result); -+ -+#endif /* __CR_NFTABLES_H__ */ -diff --git a/criu/mnl.c b/criu/mnl.c -new file mode 100644 -index 0000000..3a03202 ---- /dev/null -+++ b/criu/mnl.c -@@ -0,0 +1,165 @@ -+#include -+#include -+#include -+ -+#include -+ -+#include "nftables.h" -+#include "log.h" -+ -+int mnl_common(mnl_func_t mnl_cb, void *arg1, void *arg2) -+{ -+ char buf[MNL_SOCKET_BUFFER_SIZE]; -+ struct mnl_params mnl = { -+ .seq = time(NULL), -+ }; -+ int retval = -1; -+ -+ mnl.nl = mnl_socket_open(NETLINK_NETFILTER); -+ if (mnl.nl == NULL) { -+ pr_err("mnl_socket_open failed with %d: %s\n", errno, strerror(errno)); -+ return -1; -+ } -+ -+ if (mnl_socket_bind(mnl.nl, 0, MNL_SOCKET_AUTOPID) < 0) { -+ pr_err("mnl_socket_bind wailed with %d: %s\n", errno, strerror(errno)); -+ goto err_mnl; -+ } -+ -+ mnl.buf = buf; -+ mnl.batch = mnl_nlmsg_batch_start(buf, sizeof(buf)); -+ if (mnl.batch == NULL) -+ goto err_mnl; -+ -+ if (mnl_cb(&mnl, arg1, arg2) < 0) -+ goto err_batch; -+ -+ retval = 0; -+ -+err_batch: -+ mnl_nlmsg_batch_stop(mnl.batch); -+err_mnl: -+ mnl_socket_close(mnl.nl); -+ -+ return retval; -+} -+ -+static int mnl_sendmsg_internal(struct mnl_params *mnl, batch_func_t cb, void *args) -+{ -+ int retval = -1; -+ -+ nftnl_batch_begin(mnl_nlmsg_batch_current(mnl->batch), mnl->seq++); -+ mnl_nlmsg_batch_next(mnl->batch); -+ -+ if (cb(mnl, args) < 0) -+ goto err_batch; -+ -+ nftnl_batch_end(mnl_nlmsg_batch_current(mnl->batch), mnl->seq++); -+ mnl_nlmsg_batch_next(mnl->batch); -+ -+ if (mnl_socket_sendto(mnl->nl, mnl_nlmsg_batch_head(mnl->batch), -+ mnl_nlmsg_batch_size(mnl->batch)) < 0) { -+ pr_err("%s: mnl_socket_sendto failed with %d: %s\n", -+ __func__, errno, strerror(errno)); -+ goto err_batch; -+ } -+ -+ retval = 0; -+ -+err_batch: -+ return retval; -+} -+ -+int mnl_sendmsg(batch_func_t batch_cb, void *args) -+{ -+ return mnl_common(mnl_sendmsg_internal, batch_cb, args); -+} -+ -+int mnl_batch_send_and_recv(struct mnl_params *mnl_params, batch_func_t cb, -+ void *args, int *result) -+{ -+ struct mnl_socket *nl = mnl_params->nl; -+ struct mnl_nlmsg_batch *batch = mnl_params->batch; -+ uint32_t *seq = &mnl_params->seq; -+ char buf[MNL_SOCKET_BUFFER_SIZE]; -+ int retval; -+ -+ mnl_nlmsg_batch_reset(batch); -+ nftnl_batch_begin(mnl_nlmsg_batch_current(batch), (*seq)++); -+ mnl_nlmsg_batch_next(batch); -+ -+ if (cb(mnl_params, args) < 0) -+ return -1; -+ -+ nftnl_batch_end(mnl_nlmsg_batch_current(batch), (*seq)++); -+ mnl_nlmsg_batch_next(batch); -+ -+ if (mnl_socket_sendto(nl, mnl_nlmsg_batch_head(batch), -+ mnl_nlmsg_batch_size(batch)) < 0) { -+ pr_err("%s: mnl_socket_sendto failed with %d: %s\n", -+ __func__, errno, strerror(errno)); -+ return -1; -+ } -+ -+ /* don't care the netlink retval, and nlmsg hdr flags has no `NLM_F_ACK` */ -+ if (result == NULL) -+ return 0; -+ -+ retval = mnl_socket_recvfrom(nl, buf, sizeof(buf)); -+ while (retval > 0) { -+ retval = mnl_cb_run(buf, retval, 0, mnl_socket_get_portid(nl), NULL, NULL); -+ if (retval <= 0) -+ break; -+ retval = mnl_socket_recvfrom(nl, buf, sizeof(buf)); -+ } -+ -+ if (retval < 0) { -+ pr_err("%s: mnl batch socket recv errno with %d: %s\n", -+ __func__, errno, strerror(errno)); -+ *result = errno; -+ return -1; -+ } -+ -+ *result = 0; -+ return 0; -+} -+ -+int mnl_buf_send_and_recv(struct mnl_params *mnl_params, buf_func_t cb, -+ void *args, int *result) -+{ -+ struct mnl_socket *nl = mnl_params->nl; -+ char buf[MNL_SOCKET_BUFFER_SIZE]; -+ struct nlmsghdr *nlh; -+ int retval = 0; -+ -+ if ((nlh = cb(mnl_params, args)) == NULL) -+ return -1; -+ -+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { -+ pr_err("%s: mnl_socket_sendto failed with %d: %s\n", -+ __func__, errno, strerror(errno)); -+ return -1; -+ } -+ -+ /* don't care the netlink retval, and nlmsg hdr flags has no `NLM_F_ACK` */ -+ if (result == NULL) -+ return 0; -+ -+ retval = mnl_socket_recvfrom(nl, buf, sizeof(buf)); -+ while (retval > 0) { -+ retval = mnl_cb_run(buf, retval, 0, mnl_socket_get_portid(nl), NULL, NULL); -+ if (retval <= 0) -+ break; -+ retval = mnl_socket_recvfrom(nl, buf, sizeof(buf)); -+ } -+ -+ if (retval < 0) { -+ pr_info("%s: mnl buf socket recv errno with %d: %s\n", -+ __func__, errno, strerror(errno)); -+ *result = errno; -+ return -1; -+ } -+ -+ *result = 0; -+ return 0; -+} --- -2.34.1 - diff --git a/0059-nftables-implement-nft-api-for-tcp.patch b/0059-nftables-implement-nft-api-for-tcp.patch deleted file mode 100644 index c26320044853885de9fd44d97b44206fef6c4a75..0000000000000000000000000000000000000000 --- a/0059-nftables-implement-nft-api-for-tcp.patch +++ /dev/null @@ -1,1011 +0,0 @@ -From 099fe7c10a7eaac7df82d268d4d6bd831a68d44b Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Wed, 11 Aug 2021 16:50:49 +0800 -Subject: [PATCH 59/72] nftables: implement nft api for tcp - -Signed-off-by: fu.lin ---- - criu/Makefile.crtools | 1 + - criu/include/nftables.h | 138 +++++++ - criu/nftables.c | 823 ++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 962 insertions(+) - create mode 100644 criu/nftables.c - -diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools -index a132810..b2a7641 100644 ---- a/criu/Makefile.crtools -+++ b/criu/Makefile.crtools -@@ -99,6 +99,7 @@ obj-y += orphan-inode.o - obj-y += kmsg.o - obj-y += taskqueue.o - obj-y += mnl.o -+obj-y += nftables.o - obj-$(CONFIG_HAS_LIBBPF) += bpfmap.o - obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o - CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 -diff --git a/criu/include/nftables.h b/criu/include/nftables.h -index 0bdab31..3b51a3d 100644 ---- a/criu/include/nftables.h -+++ b/criu/include/nftables.h -@@ -3,6 +3,99 @@ - - #include - -+#include -+#include -+#include -+#include -+#include -+ -+#define construct_buf(buf, type, family, flags, seq, payload, cb_prefix) \ -+ ({ \ -+ struct nlmsghdr *_nlh; \ -+ \ -+ _nlh = nftnl_##cb_prefix##_nlmsg_build_hdr((buf), \ -+ (type), (family), (flags), (seq)); \ -+ nftnl_##cb_prefix##_nlmsg_build_payload(_nlh, (payload)); \ -+ nftnl_##cb_prefix##_free((payload)); \ -+ _nlh; \ -+ }) -+ -+#define construct_table_buf(buf, type, family, flags, seq, payload) \ -+ construct_buf((buf), (type), (family), (flags), (seq), \ -+ (payload), table) -+ -+#define construct_chain_buf(buf, type, family, flags, seq, payload) \ -+ construct_buf((buf), (type), (family), (flags), (seq), \ -+ (payload), chain) -+ -+#define construct_batch(batch, type, family, flags, seq, payload, cb_prefix) \ -+ { \ -+ struct nlmsghdr *_nlh; \ -+ \ -+ _nlh = nftnl_##cb_prefix##_nlmsg_build_hdr( \ -+ mnl_nlmsg_batch_current(batch), \ -+ (type), (family), (flags), (seq)); \ -+ nftnl_##cb_prefix##_nlmsg_build_payload(_nlh, (payload)); \ -+ nftnl_##cb_prefix##_free((payload)); \ -+ mnl_nlmsg_batch_next((batch)); \ -+ } -+ -+#define construct_table_batch(batch, type, family, flags, seq, payload) \ -+ construct_batch((batch), (type), (family), (flags), (seq), \ -+ (payload), table) -+ -+#define construct_chain_batch(batch, type, family, flags, seq, payload) \ -+ construct_batch((batch), (type), (family), (flags), (seq), \ -+ (payload), chain) -+ -+#define construct_set_batch(batch, type, family, flags, seq, payload) \ -+ construct_batch((batch), (type), (family), (flags), (seq), \ -+ (payload), set) -+ -+#define construct_rule_batch(batch, type, family, flags, seq, payload) \ -+ construct_batch((batch), (type), (family), (flags), (seq), \ -+ (payload), rule) -+ -+#define construct_set_elems_batch(batch, type, family, flags, seq, payload) \ -+ { \ -+ struct nlmsghdr *_nlh; \ -+ \ -+ _nlh = nftnl_nlmsg_build_hdr( \ -+ mnl_nlmsg_batch_current(batch), \ -+ (type), (family), (flags), (seq)); \ -+ nftnl_set_elems_nlmsg_build_payload(_nlh, (payload)); \ -+ nftnl_set_free((payload)); \ -+ mnl_nlmsg_batch_next((batch)); \ -+ } -+ -+#define TABLE_NAME "filter" -+#define INPUT_CHAIN_NAME "criu-input" -+#define OUTPUT_CHAIN_NAME "criu-output" -+#define INPUT_IPV4_SET_NAME "criu-input-ipv4-blacklist-%d" -+#define INPUT_IPV6_SET_NAME "criu-input-ipv6-blacklist-%d" -+#define OUTPUT_IPV4_SET_NAME "criu-output-ipv4-blacklist-%d" -+#define OUTPUT_IPV6_SET_NAME "criu-output-ipv6-blacklist-%d" -+ -+/* set key type, see nftables/include/datatypes.h -+ * The rule of the datatype calculation: -+ * Each type occupies 6 bits, type: -+ * - ipaddr: 7, 4 bytes -+ * - ip6addr: 8, 16 types -+ * - inet_service: 13, 2 bytes (pading to 4 bytes) -+ * -+ * 0x1cd1cd: 0b 000111 001101 000111 001101 -+ * 0x20d20d: 0b 001000 001101 001000 001101 -+ */ -+#define INET_SERVICE_LEN 2 -+#define IPADDR_LEN 4 -+#define IP6ADDR_LEN 16 -+#define div_round_up(n, d) (((n) + (d) - 1) / (d)) -+ -+#define IPv4_KEY_TYPE 0x1cd1cd -+#define IPv4_KEY_LEN div_round_up(IPADDR_LEN + INET_SERVICE_LEN, 4) * 4 * 2 -+#define IPv6_KEY_TYPE 0x20d20d -+#define IPv6_KEY_LEN div_round_up(IP6ADDR_LEN + INET_SERVICE_LEN, 4) * 4 * 2 -+ - struct mnl_params { - struct mnl_socket *nl; - char *buf; -@@ -25,4 +118,49 @@ int mnl_common(mnl_func_t mnl_cb, void *arg1, void *arg2); - int mnl_batch_send_and_recv(struct mnl_params *mnl_params, batch_func_t cb, void *args, int *result); - int mnl_buf_send_and_recv(struct mnl_params *mnl_params, buf_func_t cb, void *args, int *result); - -+struct nft_chain_params { -+ char *name; -+ uint32_t hooknum; -+ char *type; -+ uint32_t prio; -+ uint32_t policy; -+}; -+ -+struct nft_set_params { -+ char name[128]; -+ uint32_t id; -+ uint32_t datatype; -+ uint32_t key_len; -+}; -+ -+struct nft_rule_params { -+ char *chain_name; -+ char set_name[128]; -+ uint32_t mark; -+ uint16_t mark_op; -+ uint32_t nfproto; -+ uint8_t l4proto; -+ unsigned int stmt; -+ bool ipv6; -+}; -+ -+struct nft_set_elem_params { -+ char set_name[128]; -+ char data[40]; -+ size_t data_len; -+}; -+ -+struct nf_conn_params { -+ uint8_t family; -+ uint32_t *src_addr; -+ uint16_t src_port; -+ uint32_t *dst_addr; -+ uint16_t dst_port; -+ bool lock; -+ pid_t tree_id; -+}; -+ -+struct inet_sk_desc; -+int nft_connection_switch(struct inet_sk_desc *sk, bool lock, pid_t tree_id); -+ - #endif /* __CR_NFTABLES_H__ */ -diff --git a/criu/nftables.c b/criu/nftables.c -new file mode 100644 -index 0000000..57774e6 ---- /dev/null -+++ b/criu/nftables.c -@@ -0,0 +1,823 @@ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "sk-inet.h" -+#include "nftables.h" -+ -+#include "../soccr/soccr.h" -+ -+#include "log.h" -+ -+static struct nftnl_table *setup_table(uint8_t family, const char *table) -+{ -+ struct nftnl_table *t; -+ -+ t = nftnl_table_alloc(); -+ if (t == NULL) -+ return NULL; -+ -+ nftnl_table_set_u32(t, NFTNL_TABLE_FAMILY, family); -+ if (nftnl_table_set_str(t, NFTNL_TABLE_NAME, table) < 0) -+ goto err; -+ -+ return t; -+err: -+ nftnl_table_free(t); -+ return NULL; -+} -+ -+static struct nftnl_chain *setup_chain(const char *table, -+ struct nft_chain_params *params, -+ bool create) -+{ -+ struct nftnl_chain *c; -+ -+ c = nftnl_chain_alloc(); -+ if (c == NULL) -+ return NULL; -+ -+ if (nftnl_chain_set_str(c, NFTNL_CHAIN_TABLE, table) < 0) -+ goto err; -+ if (nftnl_chain_set_str(c, NFTNL_CHAIN_NAME, params->name) < 0) -+ goto err; -+ if (create) { -+ nftnl_chain_set_u32(c, NFTNL_CHAIN_HOOKNUM, params->hooknum); -+ if (nftnl_chain_set_str(c, NFTNL_CHAIN_TYPE, params->type) < 0) -+ goto err; -+ nftnl_chain_set_u32(c, NFTNL_CHAIN_PRIO, params->prio); -+ nftnl_chain_set_u32(c, NFTNL_CHAIN_POLICY, params->policy); -+ } -+ -+ return c; -+err: -+ nftnl_chain_free(c); -+ return NULL; -+} -+ -+static struct nftnl_set *setup_set(uint8_t family, const char *table, -+ struct nft_set_params *params, -+ bool create) -+{ -+ struct nftnl_set *s; -+ -+ s = nftnl_set_alloc(); -+ if (s == NULL) -+ return NULL; -+ -+ if (nftnl_set_set_str(s, NFTNL_SET_TABLE, table) < 0) -+ goto err; -+ if (nftnl_set_set_str(s, NFTNL_SET_NAME, params->name) < 0) -+ goto err; -+ if (create) { -+ nftnl_set_set_u32(s, NFTNL_SET_FAMILY, family); -+ nftnl_set_set_u32(s, NFTNL_SET_ID, params->id); -+ -+ nftnl_set_set_u32(s, NFTNL_SET_KEY_TYPE, params->datatype); -+ nftnl_set_set_u32(s, NFTNL_SET_KEY_LEN, params->key_len); -+ } -+ -+ return s; -+err: -+ nftnl_set_free(s); -+ return NULL; -+} -+ -+static int add_mark(struct nftnl_rule *r, uint32_t meta_key, enum nft_registers dreg) -+{ -+ struct nftnl_expr *e; -+ -+ e = nftnl_expr_alloc("meta"); -+ if (e == NULL) -+ return -1; -+ -+ nftnl_expr_set_u32(e, NFTNL_EXPR_META_KEY, meta_key); -+ nftnl_expr_set_u32(e, NFTNL_EXPR_META_DREG, dreg); -+ -+ nftnl_rule_add_expr(r, e); -+ -+ return 0; -+} -+ -+static int add_proto(struct nftnl_rule *r, enum nft_registers dreg) -+{ -+ struct nftnl_expr *e; -+ -+ e = nftnl_expr_alloc("meta"); -+ if (e == NULL) -+ return -1; -+ -+ nftnl_expr_set_u32(e, NFTNL_EXPR_META_KEY, NFT_META_L4PROTO); -+ nftnl_expr_set_u32(e, NFTNL_EXPR_META_DREG, dreg); -+ -+ nftnl_rule_add_expr(r, e); -+ -+ return 0; -+} -+ -+static int add_payload(struct nftnl_rule *r, uint32_t base, uint32_t dreg, -+ uint32_t offset, uint32_t len) -+{ -+ struct nftnl_expr *e; -+ -+ e = nftnl_expr_alloc("payload"); -+ if (e == NULL) -+ return -1; -+ -+ nftnl_expr_set_u32(e, NFTNL_EXPR_PAYLOAD_BASE, base); -+ nftnl_expr_set_u32(e, NFTNL_EXPR_PAYLOAD_DREG, dreg); -+ nftnl_expr_set_u32(e, NFTNL_EXPR_PAYLOAD_OFFSET, offset); -+ nftnl_expr_set_u32(e, NFTNL_EXPR_PAYLOAD_LEN, len); -+ -+ nftnl_rule_add_expr(r, e); -+ -+ return 0; -+} -+ -+static int add_cmp(struct nftnl_rule *r, enum nft_registers sreg, uint32_t op, -+ const void *data, uint32_t data_len) -+{ -+ struct nftnl_expr *e; -+ -+ e = nftnl_expr_alloc("cmp"); -+ if (e == NULL) -+ return -1; -+ -+ nftnl_expr_set_u32(e, NFTNL_EXPR_CMP_SREG, sreg); -+ nftnl_expr_set_u32(e, NFTNL_EXPR_CMP_OP, op); -+ nftnl_expr_set(e, NFTNL_EXPR_CMP_DATA, data, data_len); -+ -+ nftnl_rule_add_expr(r, e); -+ -+ return 0; -+} -+ -+static int add_lookup(struct nftnl_rule *r, enum nft_registers sreg, -+ const char *set) -+{ -+ struct nftnl_expr *e; -+ -+ e = nftnl_expr_alloc("lookup"); -+ if (e == NULL) -+ return -1; -+ -+ if (nftnl_expr_set_str(e, NFTNL_EXPR_LOOKUP_SET, set) < 0) -+ goto err; -+ nftnl_expr_set_u32(e, NFTNL_EXPR_LOOKUP_SREG, sreg); -+ -+ nftnl_rule_add_expr(r, e); -+ -+ return 0; -+err: -+ nftnl_expr_free(e); -+ return -1; -+} -+ -+static int add_counter(struct nftnl_rule *r) -+{ -+ struct nftnl_expr *e; -+ -+ e = nftnl_expr_alloc("counter"); -+ if (e == NULL) -+ return -1; -+ -+ nftnl_rule_add_expr(r, e); -+ -+ return 0; -+} -+ -+static int add_verdict(struct nftnl_rule *r, const char *chain, int verdict) -+{ -+ struct nftnl_expr *e; -+ -+ e = nftnl_expr_alloc("immediate"); -+ if (e == NULL) -+ return -1; -+ -+ nftnl_expr_set_u32(e, NFTNL_EXPR_IMM_DREG, NFT_REG_VERDICT); -+ nftnl_expr_set_u32(e, NFTNL_EXPR_IMM_VERDICT, verdict); -+ -+ nftnl_rule_add_expr(r, e); -+ -+ return 0; -+} -+ -+static int __setup_rule(struct nftnl_rule *r, struct nft_rule_params *params) -+{ -+ /* meta nfproto == */ -+ if (add_mark(r, NFT_META_PROTOCOL, NFT_REG32_00) < 0) -+ return -1; -+ if (add_cmp(r, NFT_REG32_00, NFT_CMP_EQ, ¶ms->nfproto, sizeof(uint32_t))< 0) -+ return -1; -+ -+ /* meta l4proto == */ -+ if (add_proto(r, NFT_REG32_00) < 0) -+ return -1; -+ if (add_cmp(r, NFT_REG32_00, NFT_CMP_EQ, ¶ms->l4proto, sizeof(uint8_t)) < 0) -+ return -1; -+ -+ /* ip saddr . sport . daddr . dport @ */ -+ if (params->ipv6 == false) { -+ if (add_payload(r, NFT_PAYLOAD_NETWORK_HEADER, NFT_REG32_00, -+ offsetof(struct iphdr, saddr), IPADDR_LEN) < 0) -+ return -1; -+ if (add_payload(r, NFT_PAYLOAD_TRANSPORT_HEADER, NFT_REG32_01, -+ offsetof(struct tcphdr, source), INET_SERVICE_LEN) < 0) -+ return -1; -+ if (add_payload(r, NFT_PAYLOAD_NETWORK_HEADER, NFT_REG32_02, -+ offsetof(struct iphdr, daddr), IPADDR_LEN) < 0) -+ return -1; -+ if (add_payload(r, NFT_PAYLOAD_TRANSPORT_HEADER, NFT_REG32_03, -+ offsetof(struct tcphdr, dest), INET_SERVICE_LEN) < 0) -+ return -1; -+ -+ if (add_lookup(r, NFT_REG32_00, params->set_name) < 0) -+ return -1; -+ } else { -+ if (add_payload(r, NFT_PAYLOAD_NETWORK_HEADER, NFT_REG32_00, -+ offsetof(struct ipv6hdr, saddr), IP6ADDR_LEN) < 0) -+ return -1; -+ if (add_payload(r, NFT_PAYLOAD_TRANSPORT_HEADER, NFT_REG32_04, -+ offsetof(struct tcphdr, source), INET_SERVICE_LEN) < 0) -+ return -1; -+ if (add_payload(r, NFT_PAYLOAD_NETWORK_HEADER, NFT_REG32_05, -+ offsetof(struct ipv6hdr, daddr), IP6ADDR_LEN) < 0) -+ return -1; -+ if (add_payload(r, NFT_PAYLOAD_TRANSPORT_HEADER, NFT_REG32_09, -+ offsetof(struct tcphdr, dest), INET_SERVICE_LEN) < 0) -+ return -1; -+ -+ if (add_lookup(r, NFT_REG32_00, params->set_name) < 0) -+ return -1; -+ } -+ -+ /* counter */ -+ if (add_counter(r) < 0) -+ return -1; -+ -+ return 0; -+} -+ -+static struct nftnl_rule *setup_rule(uint8_t family, const char *table, -+ struct nft_rule_params *params, -+ bool create, bool ns) -+{ -+ struct nftnl_rule *r = NULL; -+ -+ r = nftnl_rule_alloc(); -+ if (r == NULL) -+ return NULL; -+ -+ if (nftnl_rule_set_str(r, NFTNL_RULE_TABLE, table) < 0) -+ goto err; -+ nftnl_rule_set_u32(r, NFTNL_RULE_FAMILY, family); -+ if (nftnl_rule_set_str(r, NFTNL_RULE_CHAIN, params->chain_name) < 0) -+ goto err; -+ -+ if (params->mark != 0) { -+ /* meta mark != */ -+ if (add_mark(r, NFT_META_MARK, NFT_REG32_00) < 0) -+ goto err; -+ if (add_cmp(r, NFT_REG32_00, params->mark_op, ¶ms->mark, sizeof(uint32_t)) < 0) -+ goto err; -+ } -+ -+ if (!ns && __setup_rule(r, params) < 0) -+ goto err; -+ -+ /* drop */ -+ if (add_verdict(r, params->chain_name, params->stmt) < 0) -+ goto err; -+ -+ return r; -+ -+err: -+ nftnl_rule_free(r); -+ return NULL; -+} -+ -+static struct nlmsghdr *nft_table_detect(struct mnl_params *mnl_params, void *args) -+{ -+ struct nftnl_table *table; -+ -+ table = setup_table(NFPROTO_INET, TABLE_NAME); -+ if (table == NULL) -+ return NULL; -+ -+ return construct_table_buf(mnl_params->buf, NFT_MSG_GETTABLE, NFPROTO_INET, -+ NLM_F_ACK, mnl_params->seq++, table); -+} -+ -+static int nft_table_create(struct mnl_params *mnl_params, void *args) -+{ -+ struct nftnl_table *table; -+ -+ table = setup_table(NFPROTO_INET, TABLE_NAME); -+ if (table == NULL) -+ return -1; -+ -+ construct_table_batch(mnl_params->batch, NFT_MSG_NEWTABLE, NFPROTO_INET, -+ NLM_F_CREATE|NLM_F_EXCL|NLM_F_ACK, -+ mnl_params->seq++, table); -+ -+ return 0; -+} -+ -+static int nft_table_prepare(struct mnl_params *mnl_params) -+{ -+ int result = 0; -+ -+ if (mnl_buf_send_and_recv(mnl_params, nft_table_detect, NULL, &result) == 0) -+ return 0; -+ -+ pr_debug("%s: detect table result %d\n", __func__, result); -+ -+ if (result == ENOENT && -+ (mnl_batch_send_and_recv(mnl_params, nft_table_create, NULL, &result) < 0 -+ && (result != 0 && result != EEXIST))) { -+ pr_err("%s: create nftables table failed!\n", __func__); -+ return -1; -+ } else if (result != 0) { -+ pr_err("%s: detect table result %d\n", __func__, -result); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static struct nlmsghdr *nft_chain_detect(struct mnl_params *mnl_params, void *args) -+{ -+ struct nftnl_chain *chain; -+ -+ chain = setup_chain(TABLE_NAME, args, false); -+ if (chain == NULL) -+ return NULL; -+ -+ return construct_chain_buf(mnl_params->buf, NFT_MSG_GETCHAIN, NFPROTO_INET, -+ NLM_F_ACK, mnl_params->seq++, chain); -+} -+ -+static int nft_chain_create(struct mnl_params *mnl_params, void *args) -+{ -+ struct nftnl_chain *chain; -+ -+ chain = setup_chain(TABLE_NAME, args, true); -+ if (chain == NULL) -+ return -1; -+ -+ construct_chain_batch(mnl_params->batch, NFT_MSG_NEWCHAIN, NFPROTO_INET, -+ NLM_F_CREATE|NLM_F_EXCL|NLM_F_ACK, mnl_params->seq++, chain); -+ -+ return 0; -+} -+ -+static int nft_chain_prepare_internal(struct mnl_params *mnl_params, -+ struct nft_chain_params *params) -+{ -+ int result = 0; -+ -+ if (mnl_buf_send_and_recv(mnl_params, nft_chain_detect, params, &result) == 0) -+ return 0; -+ -+ pr_debug("%s: detect chain result %d\n", __func__, result); -+ -+ if (result == ENOENT && -+ (mnl_batch_send_and_recv(mnl_params, nft_chain_create, params, &result) < 0 -+ && (result != 0 && result != EEXIST))) { -+ pr_err("%s: nftables create chain %s failed!\n", -+ __func__, params->name); -+ return -1; -+ } else if (result != 0) { -+ pr_err("%s: detect chain result %d\n", __func__, -result); -+ return -1; -+ } -+ -+ return result; -+} -+ -+static int nft_chain_prepare(struct mnl_params *mnl_params) -+{ -+ struct nft_chain_params params = { -+ .type = "filter", -+ .prio = NF_IP_PRI_FILTER, -+ .policy = NF_ACCEPT, -+ }; -+ -+ /* prepare ipv4 input chain in filter table */ -+ params.name = INPUT_CHAIN_NAME; -+ params.hooknum = NF_INET_LOCAL_IN; -+ -+ if (nft_chain_prepare_internal(mnl_params, ¶ms) < 0) -+ return -1; -+ -+ /* prepare ipv4 output chain in filter table */ -+ params.name = OUTPUT_CHAIN_NAME; -+ params.hooknum = NF_INET_LOCAL_OUT; -+ -+ if (nft_chain_prepare_internal(mnl_params, ¶ms) < 0) -+ return -1; -+ -+ return 0; -+} -+ -+static int nft_set_internal(uint8_t family, struct mnl_params *mnl_params, -+ struct nft_set_params *params, bool create) -+{ -+ struct nftnl_set *set; -+ -+ set = setup_set(family, TABLE_NAME, params, create); -+ if (set == NULL) -+ return -1; -+ -+ if (create) { -+ construct_set_batch(mnl_params->batch, NFT_MSG_NEWSET, family, -+ NLM_F_CREATE|NLM_F_EXCL|NLM_F_ACK, mnl_params->seq++, set); -+ } else { -+ construct_set_batch(mnl_params->batch, NFT_MSG_DELSET, family, -+ 0, mnl_params->seq++, set); -+ } -+ -+ return 0; -+} -+ -+static int nft_set_raw(struct mnl_params *mnl_params, -+ struct mnl_cb_params *args, bool input) -+{ -+ const uint32_t set_id_base = input ? 0x12315 : 0x17173; -+ const uint8_t family = NFPROTO_INET; -+ struct nft_set_params params = { 0 }; -+ char *set_name; -+ int idx = 0; -+ -+ if (!args->ipv6) { -+ params.datatype = IPv4_KEY_TYPE; -+ params.key_len = IPv4_KEY_LEN; -+ idx = 4; -+ } else { -+ params.datatype = IPv6_KEY_TYPE; -+ params.key_len = IPv6_KEY_LEN; -+ idx = 6; -+ } -+ -+ if (args->ipv6 && input) -+ set_name = INPUT_IPV6_SET_NAME; -+ else if (args->ipv6 && !input) -+ set_name = OUTPUT_IPV6_SET_NAME; -+ else if (!args->ipv6 && input) -+ set_name = INPUT_IPV4_SET_NAME; -+ else -+ set_name = OUTPUT_IPV4_SET_NAME; -+ -+ snprintf(params.name, sizeof(params.name)-1, set_name, args->tree_id); -+ params.id = set_id_base + args->tree_id + idx; -+ -+ if (nft_set_internal(family, mnl_params, ¶ms, args->create) < 0) { -+ pr_err("%s: create nftables %s %s set failed!\n", __func__, -+ args->ipv6 ? "ipv6" : "ipv4", -+ input ? "input" : "output"); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int nft_set(struct mnl_params *mnl_params, void *args) -+{ -+ struct mnl_cb_params *params = args; -+ -+ params->ipv6 = false; -+ if (nft_set_raw(mnl_params, params, true) < 0) -+ return -1; -+ -+ if (nft_set_raw(mnl_params, params, false) < 0) -+ return -1; -+ -+ params->ipv6 = true; -+ if (nft_set_raw(mnl_params, params, true) < 0) -+ return -1; -+ -+ if (nft_set_raw(mnl_params, params, false) < 0) -+ return -1; -+ -+ return 0; -+} -+ -+static int nft_set_common(struct mnl_params *mnl_params, pid_t tree_id, bool create) -+{ -+ struct mnl_cb_params params = { -+ .tree_id = tree_id, -+ .create = create, -+ }; -+ int result = 0; -+ -+ if (create && -+ (mnl_batch_send_and_recv(mnl_params, nft_set, ¶ms, &result) < 0 -+ && (result != 0 && result != EEXIST))) { -+ pr_err("%s: create set failed!\n", __func__); -+ return -1; -+ } else if (!create && -+ mnl_batch_send_and_recv(mnl_params, nft_set, ¶ms, NULL) < 0) { -+ pr_err("%s: delete set failed!\n", __func__); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int nft_rule_internal(uint8_t family, struct mnl_params *mnl_params, -+ struct nft_rule_params *params, bool create) -+{ -+ struct nftnl_rule *rule; -+ -+ rule = setup_rule(family, TABLE_NAME, params, create, false); -+ if (rule == NULL) -+ return -1; -+ -+ if (create) { -+ construct_rule_batch(mnl_params->batch, NFT_MSG_NEWRULE, family, -+ NLM_F_CREATE|NLM_F_EXCL|NLM_F_ACK, -+ mnl_params->seq++, rule); -+ } else { -+ construct_rule_batch(mnl_params->batch, NFT_MSG_DELRULE, family, -+ 0, mnl_params->seq++, rule); -+ } -+ -+ return 0; -+} -+ -+static int nft_rule_raw(struct mnl_params *mnl_params, struct mnl_cb_params *args, -+ struct nft_rule_params *params) -+{ -+ char *set_name; -+ -+ params->nfproto = params->ipv6 ? htons(ETH_P_IPV6) : htons(ETH_P_IP); -+ -+ set_name = params->ipv6 ? INPUT_IPV6_SET_NAME : INPUT_IPV4_SET_NAME; -+ params->chain_name = INPUT_CHAIN_NAME; -+ snprintf(params->set_name, sizeof(params->set_name)-1, set_name, args->tree_id); -+ if (nft_rule_internal(NFPROTO_INET, mnl_params, params, args->create) < 0) { -+ pr_err("%s: create nft %s input rule failed!\n", -+ __func__, params->ipv6 ? "ipv6" : "ipv4"); -+ return -1; -+ } -+ -+ set_name = params->ipv6 ? OUTPUT_IPV6_SET_NAME : OUTPUT_IPV4_SET_NAME; -+ params->chain_name = OUTPUT_CHAIN_NAME; -+ snprintf(params->set_name, sizeof(params->set_name)-1, set_name, args->tree_id); -+ if (nft_rule_internal(NFPROTO_INET, mnl_params, params, args->create) < 0) { -+ pr_err("%s: create nftables %s output rule failed!\n", -+ __func__, params->ipv6 ? "ipv6" : "ipv4"); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int nft_rule(struct mnl_params *mnl_params, void *args) -+{ -+ struct nft_rule_params params = { -+ .l4proto = IPPROTO_TCP, -+ .mark = SOCCR_MARK, -+ .mark_op = NFT_CMP_NEQ, -+ .stmt = NF_DROP, -+ }; -+ -+ params.ipv6 = false; -+ if (nft_rule_raw(mnl_params, args, ¶ms) < 0) -+ return -1; -+ -+ params.ipv6 = true; -+ if (nft_rule_raw(mnl_params, args, ¶ms) < 0) -+ return -1; -+ -+ return 0; -+} -+ -+static int nft_rule_common(struct mnl_params *mnl_params, pid_t tree_id, bool create) -+{ -+ struct mnl_cb_params params = { -+ .tree_id = tree_id, -+ .create = create, -+ }; -+ int result = 0; -+ -+ if (create && -+ (mnl_batch_send_and_recv(mnl_params, nft_rule, ¶ms, &result) < 0 -+ && (result != 0 && result != EEXIST))) { -+ pr_err("%s: create rule failed!\n", __func__); -+ return -1; -+ } else if (!create && -+ mnl_batch_send_and_recv(mnl_params, nft_rule, ¶ms, NULL) < 0) { -+ pr_err("%s: delete rule failed!\n", __func__); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static int network_prepare_internal(struct mnl_params *params, batch_func_t _, void *args) -+{ -+ pid_t tree_id = *(pid_t *)args; -+ -+ if (nft_table_prepare(params) < 0) -+ return -1; -+ -+ if (nft_chain_prepare(params) < 0) -+ return -1; -+ -+ if (nft_set_common(params, tree_id, true) < 0) -+ return -1; -+ -+ if (nft_rule_common(params, tree_id, true) < 0) -+ return -1; -+ -+ return 0; -+} -+ -+int network_prepare(pid_t tree_id) -+{ -+ pr_info("Prepare network\n"); -+ -+ return mnl_common(network_prepare_internal, NULL, &tree_id); -+} -+ -+static int network_unprepare_internal(struct mnl_params *params, -+ batch_func_t _, void *args) -+{ -+ pid_t tree_id = *(pid_t *)args; -+ -+ if (nft_rule_common(params, tree_id, false) < 0) -+ return -1; -+ -+ if (nft_set_common(params, tree_id, false) < 0) -+ return -1; -+ -+ return 0; -+} -+ -+void network_unprepare(pid_t tree_id) -+{ -+ pr_info("Unprepare network\n"); -+ -+ mnl_common(network_unprepare_internal, NULL, &tree_id); -+} -+ -+static int add_set_elem_internal(struct nftnl_set *s, void *data, size_t len) -+{ -+ struct nftnl_set_elem *e; -+ -+ e = nftnl_set_elem_alloc(); -+ if (e == NULL) -+ return -1; -+ -+ nftnl_set_elem_set(e, NFTNL_SET_ELEM_KEY, data, len); -+ -+ nftnl_set_elem_add(s, e); -+ -+ return 0; -+} -+ -+static struct nftnl_set *add_set_elem(const char *table, const char *set, -+ void *data, size_t len) -+{ -+ struct nftnl_set *s; -+ -+ s = nftnl_set_alloc(); -+ if (s == NULL) -+ return NULL; -+ -+ if (nftnl_set_set_str(s, NFTNL_SET_TABLE, table) < 0) -+ goto err; -+ if (nftnl_set_set_str(s, NFTNL_SET_NAME, set) < 0) -+ goto err; -+ -+ if (add_set_elem_internal(s, data, len) < 0) -+ goto err; -+ -+ return s; -+ -+err: -+ nftnl_set_free(s); -+ return NULL; -+} -+ -+static int nft_set_elem(uint8_t family, struct mnl_params *mnl_param, -+ struct nft_set_elem_params *elem_param, -+ bool lock) -+{ -+ struct nftnl_set *set; -+ -+ set = add_set_elem(TABLE_NAME, elem_param->set_name, -+ elem_param->data, elem_param->data_len); -+ if (set == NULL) -+ return -1; -+ -+ if (lock) { -+ construct_set_elems_batch(mnl_param->batch, NFT_MSG_NEWSETELEM, -+ family, NLM_F_CREATE|NLM_F_EXCL, -+ mnl_param->seq++, set); -+ } else { -+ construct_set_elems_batch(mnl_param->batch, NFT_MSG_DELSETELEM, -+ family, 0, mnl_param->seq++, set); -+ } -+ -+ return 0; -+} -+ -+static void construct_set_elem_key(void *data, struct nf_conn_params *param, bool output) -+{ -+ size_t offset = 0; -+ size_t addr_len = param->family == AF_INET ? IPADDR_LEN : IP6ADDR_LEN; -+ -+ memcpy(data+offset, output ? param->src_addr : param->dst_addr, addr_len); -+ offset = addr_len; -+ *(uint32_t *)(data + offset) = htons(output ? param->src_port : param->dst_port); -+ offset += sizeof(uint32_t); -+ memcpy(data+offset, output ? param->dst_addr : param->src_addr, addr_len); -+ offset += addr_len; -+ *(uint32_t *)(data + offset) = htons(output ? param->dst_port : param->src_port); -+} -+ -+static int nf_connection_switch_raw(struct mnl_params *mnl_params, void *args) -+{ -+ struct nf_conn_params *param = args; -+ char *input_set_name, *output_set_name; -+ struct nft_set_elem_params elem; -+ -+ switch (param->family) { -+ case AF_INET: -+ input_set_name = INPUT_IPV4_SET_NAME; -+ output_set_name = OUTPUT_IPV4_SET_NAME; -+ elem.data_len = IPv4_KEY_LEN; -+ break; -+ case AF_INET6: -+ input_set_name = INPUT_IPV6_SET_NAME; -+ output_set_name = OUTPUT_IPV6_SET_NAME; -+ elem.data_len = IPv6_KEY_LEN; -+ break; -+ default: -+ pr_err("Unknown socket family %d\n", param->family); -+ return -1; -+ } -+ -+ construct_set_elem_key(elem.data, param, false); -+ snprintf(elem.set_name, sizeof(elem.set_name)-1, input_set_name, param->tree_id); -+ if (nft_set_elem(NFPROTO_INET, mnl_params, &elem, param->lock) < 0) -+ return -1; -+ -+ construct_set_elem_key(elem.data, param, true); -+ snprintf(elem.set_name, sizeof(elem.set_name)-1, output_set_name, param->tree_id); -+ if (nft_set_elem(NFPROTO_INET, mnl_params, &elem, param->lock) < 0) -+ return -1; -+ -+ return 0; -+} -+ -+/* IPv4-Mapped IPv6 Addresses */ -+static int ipv6_addr_mapped(uint32_t *addr) -+{ -+ return (addr[2] == htonl(0x0000ffff)); -+} -+ -+int nft_connection_switch(struct inet_sk_desc *sk, bool lock, pid_t tree_id) -+{ -+ char sip[INET_ADDR_LEN], dip[INET_ADDR_LEN]; -+ struct nf_conn_params param = { -+ .family = sk->sd.family, -+ .src_addr = sk->src_addr, -+ .src_port = sk->src_port, -+ .dst_addr = sk->dst_addr, -+ .dst_port = sk->dst_port, -+ .lock = lock, -+ .tree_id = tree_id, -+ }; -+ -+ if (param.family == AF_INET6 && ipv6_addr_mapped(param.dst_addr)) { -+ param.family = AF_INET; -+ param.src_addr = ¶m.src_addr[3]; -+ param.dst_addr = ¶m.dst_addr[3]; -+ } -+ -+ if (!inet_ntop(param.family, (void *)param.src_addr, sip, INET_ADDR_LEN) || -+ !inet_ntop(param.family, (void *)param.dst_addr, dip, INET_ADDR_LEN)) { -+ pr_perror("nf: Can't translate ip addr"); -+ return -1; -+ } -+ -+ pr_info("%s %s:%d - %s:%d connection\n", lock ? "Locked" : "Unlocked", -+ sip, (int)param.src_port, dip, (int)param.dst_port); -+ -+ return mnl_sendmsg(nf_connection_switch_raw, ¶m); -+} --- -2.34.1 - diff --git a/0060-net-switch-to-nftables-API.patch b/0060-net-switch-to-nftables-API.patch deleted file mode 100644 index 7cc7007144f205ed99c05a7b7888a27eb8f8ed4a..0000000000000000000000000000000000000000 --- a/0060-net-switch-to-nftables-API.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 073ed2ef448fb073aa3c6f0552e120e3e98a8906 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Wed, 13 Apr 2022 14:30:54 +0800 -Subject: [PATCH 60/72] net: switch to nftables API - -This is fake patch - -Signed-off-by: fu.lin ---- - criu/config.c | 2 ++ - criu/crtools.c | 1 + - criu/include/cr_options.h | 2 ++ - 3 files changed, 5 insertions(+) - -diff --git a/criu/config.c b/criu/config.c -index c0358e5..7c4e230 100644 ---- a/criu/config.c -+++ b/criu/config.c -@@ -711,6 +711,8 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, - { "reserve-ports", required_argument, 0, 'P' }, - BOOL_OPT("parallel", &opts.parallel), - { "exec-pin-start", required_argument, 0, 2002 }, -+ BOOL_OPT("use-nft", &opts.use_nft), -+ BOOL_OPT("async-clear-nft", &opts.async_clear_nft), - {}, - }; - -diff --git a/criu/crtools.c b/criu/crtools.c -index 40e2d51..c555213 100644 ---- a/criu/crtools.c -+++ b/criu/crtools.c -@@ -474,6 +474,7 @@ usage: - " --reserve-ports Reserve src ports in kernel\n" - " --parallel Collect smaps parallel to accellrate dumping speed\n" - " --exec-pin-start Exec file map's pin start index\n" -+ " --use Use nft API instead of iptables cmd in network locking\n" - "\n" - "Check options:\n" - " Without options, \"criu check\" checks availability of absolutely required\n" -diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h -index a64e977..6dadaba 100644 ---- a/criu/include/cr_options.h -+++ b/criu/include/cr_options.h -@@ -203,6 +203,8 @@ struct cr_options { - int reserve_ports; - int parallel; - int exec_pin_start; -+ int use_nft; -+ int async_clear_nft; - }; - - extern struct cr_options opts; --- -2.34.1 - diff --git a/0061-zdtm-unlink-kdat-before-testing.patch b/0061-zdtm-unlink-kdat-before-testing.patch deleted file mode 100644 index a09395fdb0525c05e0fd1c5cba431f716b7adfa2..0000000000000000000000000000000000000000 --- a/0061-zdtm-unlink-kdat-before-testing.patch +++ /dev/null @@ -1,46 +0,0 @@ -From 926affe76a99871f9a95f3381190bd3fb601e6ec Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Fri, 21 Jan 2022 14:46:21 +0800 -Subject: [PATCH 61/72] zdtm: unlink kdat before testing - -Signed-off-by: fu.lin ---- - test/zdtm.py | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/test/zdtm.py b/test/zdtm.py -index 0feece0..1b2c7da 100755 ---- a/test/zdtm.py -+++ b/test/zdtm.py -@@ -24,6 +24,7 @@ import sys - import tempfile - import time - import socket -+import pathlib - from builtins import (input, int, open, range, str, zip) - - import pycriu as crpc -@@ -2662,6 +2663,9 @@ rp.add_argument("--pre-dump-mode", - help="Use splice or read mode of pre-dumping", - choices=['splice', 'read'], - default='splice') -+rp.add_argument("--kdat", -+ help="Path to criu.kdat, default '/run/criu.kdat'", -+ default="/run/criu.kdat") - - lp = sp.add_parser("list", help="List tests") - lp.set_defaults(action=list_tests) -@@ -2692,6 +2696,10 @@ if opts['debug']: - - if opts['action'] == 'run': - criu.available() -+ # remove kdat file before testing -+ kdat = pathlib.Path(opts['kdat']) -+ if kdat.exists(): -+ kdat.unlink() - for tst in test_classes.values(): - tst.available() - --- -2.34.1 - diff --git a/0062-zdtm-add-host-ns-sysvshm-ipc-case.patch b/0062-zdtm-add-host-ns-sysvshm-ipc-case.patch deleted file mode 100644 index 5a98f4ea34fe0d4ecbcf70146708552f0fca95ff..0000000000000000000000000000000000000000 --- a/0062-zdtm-add-host-ns-sysvshm-ipc-case.patch +++ /dev/null @@ -1,302 +0,0 @@ -From 3d945368250958f5ebf3b4053e07c816adafba33 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Fri, 21 Jan 2022 17:20:05 +0800 -Subject: [PATCH 62/72] zdtm: add host ns sysvshm ipc case - ---- - test/zdtm/Makefile | 2 +- - test/zdtm/customization/Makefile | 53 ++++++++ - test/zdtm/customization/ipc.c | 202 +++++++++++++++++++++++++++++++ - test/zdtm/customization/ipc.desc | 1 + - 4 files changed, 257 insertions(+), 1 deletion(-) - create mode 100644 test/zdtm/customization/Makefile - create mode 100644 test/zdtm/customization/ipc.c - create mode 100644 test/zdtm/customization/ipc.desc - -diff --git a/test/zdtm/Makefile b/test/zdtm/Makefile -index 24a33f2..8f9857b 100644 ---- a/test/zdtm/Makefile -+++ b/test/zdtm/Makefile -@@ -1,4 +1,4 @@ --SUBDIRS := lib static transition -+SUBDIRS := lib static transition customization - - all: $(SUBDIRS) - .PHONY: all $(SUBDIRS) -diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile -new file mode 100644 -index 0000000..563b7b1 ---- /dev/null -+++ b/test/zdtm/customization/Makefile -@@ -0,0 +1,53 @@ -+LIBDIR := ../lib -+LIB := $(LIBDIR)/libzdtmtst.a -+LDLIBS += $(LIB) -+CPPFLAGS += -I$(LIBDIR) -+ -+TST = \ -+ ipc -+ -+SRC = $(TST:%=%.c) -+OBJ = $(SRC:%.c=%.o) -+DEP = $(SRC:%.c=%.d) -+PID = $(TST:%=%.pid) -+OUT = $(TST:%=%.out) -+ -+include ../Makefile.inc -+ -+all: $(TST) -+install: all -+.PHONY: all install -+ -+$(TST:%=%.pid): %.pid: % -+ $(/dev/null` 2>/dev/null || break; \ -+ sleep 1; \ -+ done -+ -+$(TST): | $(LIB) -+ -+%: %.sh -+ cp $< $@ -+ chmod +x $@ -+ -+$(LIB): force -+ $(Q) $(MAKE) -C $(LIBDIR) -+ -+.PHONY: force start check_start stop wait_stop -diff --git a/test/zdtm/customization/ipc.c b/test/zdtm/customization/ipc.c -new file mode 100644 -index 0000000..2b3c2b1 ---- /dev/null -+++ b/test/zdtm/customization/ipc.c -@@ -0,0 +1,202 @@ -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "zdtmtst.h" -+ -+const char *test_doc="Tests ipc sems and shmems migrate fine"; -+const char *test_author="Pavel Emelianov "; -+ -+static struct sembuf unlock = { -+ .sem_op = 1, -+ .sem_num = 0, -+ .sem_flg = 0, -+}; -+ -+static struct sembuf lock = { -+ .sem_op = -1, -+ .sem_num = 0, -+ .sem_flg = 0, -+}; -+ -+#define DEF_MEM_SIZE (40960) -+unsigned int shmem_size = DEF_MEM_SIZE; -+TEST_OPTION(shmem_size, uint, "Size of shared memory segment", 0); -+ -+#define INIT_CRC (~0) -+ -+#define POISON 0xac -+static inline void poison_area(int *mem) -+{ -+ memset(mem, POISON, shmem_size); -+} -+ -+static int child(key_t key) -+{ -+ int sem, shm, ret, res = 0; -+ uint8_t *mem; -+ uint32_t crc; -+ -+ sem = semget(key, 1, 0777); -+ if (sem == -1) -+ return -1; -+ shm = shmget(key, shmem_size, 0777); -+ if (shm == -1) -+ return -2; -+ mem = shmat(shm, NULL, 0); -+ if (mem == (uint8_t *)-1) -+ return -3; -+ -+ while (test_go()) { -+ ret = semop(sem, &lock, 1); -+ if (ret) { -+ if (errno == EINTR) -+ continue; -+ fail("Error in semop lock"); -+ res = errno; -+ break; -+ } -+ crc = INIT_CRC; -+ datagen(mem, shmem_size, &crc); -+ while ((ret = semop(sem, &unlock, 1)) && (errno == EINTR)); -+ if (ret) { -+ fail("Error in semop unlock"); -+ res = errno; -+ break; -+ } -+ } -+ shmdt(mem); -+ return res; -+} -+ -+int main(int argc, char **argv) -+{ -+ key_t key; -+ int sem, shm, pid1, pid2; -+ int fail_count = 0; -+ uint8_t *mem; -+ uint32_t crc; -+ int ret; -+ -+ test_init(argc, argv); -+ -+ /* using the large number to fill string length */ -+ key = ftok(argv[0], 1822155650); -+ if (key == -1) { -+ pr_perror("Can't make key"); -+ goto out; -+ } -+ -+ sem = semget(key, 1, 0777 | IPC_CREAT | IPC_EXCL); -+ if (sem == -1) { -+ pr_perror("Can't get sem"); -+ goto out; -+ } -+ -+ if (semctl(sem, 0, SETVAL, 1) == -1) { -+ pr_perror("Can't init sem"); -+ fail_count++; -+ goto out_sem; -+ } -+ -+ shm = shmget(key, shmem_size, 0777 | IPC_CREAT | IPC_EXCL); -+ if (shm == -1) { -+ pr_perror("Can't get shm"); -+ fail_count++; -+ goto out_sem; -+ } -+ -+ mem = shmat(shm, NULL, 0); -+ if (mem == (void *)-1) { -+ pr_perror("Can't attach shm"); -+ fail_count++; -+ goto out_shm; -+ } -+ -+ poison_area((int *)mem); -+ -+ pid1 = test_fork(); -+ if (pid1 == -1) { -+ pr_perror("Can't fork 1st time"); -+ goto out_shdt; -+ } else if (pid1 == 0) -+ exit(child(key)); -+ -+ pid2 = test_fork(); -+ if (pid2 == -1) { -+ pr_perror("Can't fork 2nd time"); -+ fail_count++; -+ goto out_child; -+ } else if (pid2 == 0) -+ exit(child(key)); -+ -+ test_daemon(); -+ while (test_go()) { -+ ret = semop(sem, &lock, 1); -+ if (ret) { -+ if (errno == EINTR) -+ continue; -+ fail_count++; -+ fail("Error in semop lock"); -+ break; -+ } -+ if (mem[0] != POISON) { -+ crc = INIT_CRC; -+ if (datachk(mem, shmem_size, &crc)) { -+ fail_count++; -+ fail("Semaphore protection is broken or " -+ "shmem pages are messed"); -+ semop(sem, &unlock, 1); -+ break; -+ } -+ poison_area((int *)mem); -+ } -+ while ((ret = semop(sem, &unlock, 1)) && (errno == EINTR)); -+ if (ret) { -+ fail_count++; -+ fail("Error in semop unlock"); -+ break; -+ } -+ } -+ test_waitsig(); -+ -+ kill(pid2, SIGTERM); -+ waitpid(pid2, &ret, 0); -+ if (!WIFEXITED(ret)) { -+ fail_count++; -+ pr_perror("Child 2 was killed"); -+ } else if (WEXITSTATUS(ret)) { -+ fail_count++; -+ pr_perror("Child 2 couldn't inititalise"); -+ } -+out_child: -+ kill(pid1, SIGTERM); -+ waitpid(pid1, &ret, 0); -+ if (!WIFEXITED(ret)) { -+ fail_count++; -+ pr_perror("Child 1 was killed"); -+ } else if (WEXITSTATUS(ret)) { -+ fail_count++; -+ pr_perror("Child 1 couldn't inititalise"); -+ } -+out_shdt: -+ shmdt(mem); -+out_shm: -+ shmctl(shm, IPC_RMID, NULL); -+out_sem: -+ semctl(sem, 1, IPC_RMID); -+ if (fail_count == 0) -+ pass(); -+out: -+ return 0; -+} -diff --git a/test/zdtm/customization/ipc.desc b/test/zdtm/customization/ipc.desc -new file mode 100644 -index 0000000..63df42a ---- /dev/null -+++ b/test/zdtm/customization/ipc.desc -@@ -0,0 +1 @@ -+{'flavor': 'h'} --- -2.34.1 - diff --git a/0063-zdtm-add-pinmem-testcase.patch b/0063-zdtm-add-pinmem-testcase.patch deleted file mode 100644 index be9a474ace79cf51477fea3c83348fecc182c744..0000000000000000000000000000000000000000 --- a/0063-zdtm-add-pinmem-testcase.patch +++ /dev/null @@ -1,2091 +0,0 @@ -From 4f9fed183bcfda1285d7e99136ff02e3778012ba Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Tue, 25 Jan 2022 19:00:33 +0800 -Subject: [PATCH 63/72] zdtm: add pinmem testcase - -Signed-off-by: fu.lin ---- - test/zdtm.py | 68 ++- - test/zdtm/customization/Makefile | 23 +- - test/zdtm/customization/get_smaps_bits.c | 127 +++++ - test/zdtm/customization/get_smaps_bits.h | 6 + - test/zdtm/customization/ipc.desc | 2 +- - test/zdtm/customization/maps00.c | 271 +++++++++++ - test/zdtm/customization/maps00.desc | 1 + - test/zdtm/customization/maps007.c | 178 +++++++ - test/zdtm/customization/maps007.desc | 1 + - test/zdtm/customization/maps008.c | 514 ++++++++++++++++++++ - test/zdtm/customization/maps008.desc | 1 + - test/zdtm/customization/maps01.c | 183 +++++++ - test/zdtm/customization/maps01.desc | 1 + - test/zdtm/customization/maps02.c | 111 +++++ - test/zdtm/customization/maps02.desc | 1 + - test/zdtm/customization/maps04.c | 57 +++ - test/zdtm/customization/maps04.desc | 1 + - test/zdtm/customization/maps05.c | 91 ++++ - test/zdtm/customization/maps05.desc | 1 + - test/zdtm/customization/maps06.c | 70 +++ - test/zdtm/customization/maps06.desc | 1 + - test/zdtm/customization/maps_file_prot.c | 53 ++ - test/zdtm/customization/maps_file_prot.desc | 1 + - test/zdtm_ct.c | 13 +- - 24 files changed, 1766 insertions(+), 10 deletions(-) - create mode 100644 test/zdtm/customization/get_smaps_bits.c - create mode 100644 test/zdtm/customization/get_smaps_bits.h - create mode 100644 test/zdtm/customization/maps00.c - create mode 100644 test/zdtm/customization/maps00.desc - create mode 100644 test/zdtm/customization/maps007.c - create mode 100644 test/zdtm/customization/maps007.desc - create mode 100644 test/zdtm/customization/maps008.c - create mode 100644 test/zdtm/customization/maps008.desc - create mode 100644 test/zdtm/customization/maps01.c - create mode 100644 test/zdtm/customization/maps01.desc - create mode 100644 test/zdtm/customization/maps02.c - create mode 100644 test/zdtm/customization/maps02.desc - create mode 100644 test/zdtm/customization/maps04.c - create mode 100644 test/zdtm/customization/maps04.desc - create mode 100644 test/zdtm/customization/maps05.c - create mode 100644 test/zdtm/customization/maps05.desc - create mode 100644 test/zdtm/customization/maps06.c - create mode 100644 test/zdtm/customization/maps06.desc - create mode 100644 test/zdtm/customization/maps_file_prot.c - create mode 100644 test/zdtm/customization/maps_file_prot.desc - -diff --git a/test/zdtm.py b/test/zdtm.py -index 1b2c7da..d3b146f 100755 ---- a/test/zdtm.py -+++ b/test/zdtm.py -@@ -367,6 +367,9 @@ def test_flag(tdesc, flag): - return flag in tdesc.get('flags', '').split() - - -+def test_value(tdesc, opt, val): -+ return val in tdesc.get(opt, '').split() -+ - # - # Exception thrown when something inside the test goes wrong, - # e.g. test doesn't start, criu returns with non zero code or -@@ -1445,6 +1448,24 @@ class criu: - "check", ["--no-default-config", "-v0", "--feature", feature], - opts['criu_bin']) == 0 - -+ @staticmethod -+ def check_cmdline(cmdline): -+ with open("/proc/cmdline") as f: -+ bootparams = f.readline().strip().split() -+ -+ for arg in cmdline.split(): -+ words = [word.strip("'\" ") for word in arg.split('=')] -+ matched = False -+ for param in bootparams: -+ prefix = param.startswith(words[0]) -+ if (len(words) == 1 and prefix) \ -+ or (len(words) == 2 and prefix and param[len(words[0])+1:] == words[1]): -+ matched = True -+ break -+ if not matched: -+ return True -+ return False -+ - @staticmethod - def available(): - if not os.access(opts['criu_bin'], os.X_OK): -@@ -1516,6 +1537,11 @@ def cr(cr_api, test, opts): - - iters = iter_parm(opts['iters'], 1) - for i in iters[0]: -+ if "--pin-memory" in test.getdopts(): -+ print("Clear pin memory space") -+ cmd = [opts["criu_bin"], "clear-pin-memory"] -+ subprocess.run(cmd, shell=False, check=True) -+ - pres = iter_parm(opts['pre'], 0) - for p in pres[0]: - if opts['snaps']: -@@ -1965,6 +1991,21 @@ class Launcher: - testline = u"ok %d - %s # SKIP %s" % (self.__runtest, name, reason) - print(testline, file=self.__file_report) - -+ def modprobe_pin_memory(self, load): -+ if not load: -+ return -+ else: -+ found = False -+ with open("/proc/modules") as f: -+ for line in f.readlines(): -+ if "pin_memory" == line.split()[0]: -+ found = True -+ if not found: -+ subprocess.check_call(["modprobe", "pin_memory"]) -+ -+ cmd = [opts["criu_bin"], "init-pagemap-read"] -+ subprocess.check_call(cmd, shell=False) -+ - def run_test(self, name, desc, flavor): - - if len(self.__subs) >= self.__max: -@@ -1972,7 +2013,8 @@ class Launcher: - - with open("/proc/sys/kernel/tainted") as taintfd: - taint = taintfd.read() -- if self.__taint != taint: -+ # 0x1000 means the out of tree module has been loaded -+ if self.__taint != taint and (int(self.__taint) | 0x1000) != int(taint): - raise Exception("The kernel is tainted: %r (%r)" % - (taint, self.__taint)) - -@@ -1997,8 +2039,15 @@ class Launcher: - logf = None - log = None - -+ no_pid_ns = test_value(desc, 'opts', '--use-fork-pid') -+ zdtm_no_pid_ns = "1" if no_pid_ns else "0" -+ # load `pin_memory.ko`,`--pin-memory` option must be used with -+ # `--use-fork-pid`, so don't care `--pin-memory` option -+ self.modprobe_pin_memory(no_pid_ns) -+ - sub = subprocess.Popen(["./zdtm_ct", "zdtm.py"], -- env=dict(os.environ, CR_CT_TEST_INFO=arg), -+ env=dict(os.environ, CR_CT_TEST_INFO=arg, -+ ZDTM_NO_PID_NS=zdtm_no_pid_ns), - stdout=log, - stderr=subprocess.STDOUT, - close_fds=True) -@@ -2009,7 +2058,8 @@ class Launcher: - "start": time.time() - } - -- if test_flag(desc, 'excl'): -+ # pin memory function don't support concurrency -+ if test_flag(desc, 'excl') or test_value(desc, "opts", "--pin-memory"): - self.wait() - - def __wait_one(self, flags): -@@ -2356,6 +2406,12 @@ def run_tests(opts): - launcher.skip(t, "remote lazy pages are not supported") - continue - -+ cmdline = tdesc.get('cmdline', '') -+ if cmdline and criu.check_cmdline(cmdline): -+ launcher.skip( -+ t, f"cmdline '{cmdline}' isn't support, or don't set") -+ continue -+ - test_flavs = tdesc.get('flavor', 'h ns uns').split() - opts_flavs = (opts['flavor'] or 'h,ns,uns').split(',') - if opts_flavs != ['best']: -@@ -2385,6 +2441,7 @@ def run_tests(opts): - if fail: - sys.exit(1) - -+ - sti_fmt = "%-40s%-10s%s" - - -@@ -2664,8 +2721,8 @@ rp.add_argument("--pre-dump-mode", - choices=['splice', 'read'], - default='splice') - rp.add_argument("--kdat", -- help="Path to criu.kdat, default '/run/criu.kdat'", -- default="/run/criu.kdat") -+ help="Path to criu.kdat, default '/run/criu.kdat'", -+ default="/run/criu.kdat") - - lp = sp.add_parser("list", help="List tests") - lp.set_defaults(action=list_tests) -@@ -2700,6 +2757,7 @@ if opts['action'] == 'run': - kdat = pathlib.Path(opts['kdat']) - if kdat.exists(): - kdat.unlink() -+ - for tst in test_classes.values(): - tst.available() - -diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile -index 563b7b1..82348f2 100644 ---- a/test/zdtm/customization/Makefile -+++ b/test/zdtm/customization/Makefile -@@ -3,9 +3,21 @@ LIB := $(LIBDIR)/libzdtmtst.a - LDLIBS += $(LIB) - CPPFLAGS += -I$(LIBDIR) - --TST = \ -- ipc -+TST_NOFILE = \ -+ ipc \ -+ maps01 \ -+ maps02 \ -+ maps04 \ -+ maps05 \ -+ maps007 \ -+ maps008 - -+TST_FILE = \ -+ maps00 \ -+ maps06 \ -+ maps_file_prot -+ -+TST = $(TST_NOFILE) $(TST_FILE) - SRC = $(TST:%=%.c) - OBJ = $(SRC:%.c=%.o) - DEP = $(SRC:%.c=%.d) -@@ -18,9 +30,12 @@ all: $(TST) - install: all - .PHONY: all install - --$(TST:%=%.pid): %.pid: % -+$(TST_NOFILE:%=%.pid): %.pid: % - $( -+#include -+#include "zdtmtst.h" -+ -+#ifndef MAP_HUGETLB -+# define MAP_HUGETLB 0x40000 -+#endif -+ -+#ifndef MADV_HUGEPAGE -+# define MADV_HUGEPAGE 14 -+#endif -+ -+#ifndef MADV_NOHUGEPAGE -+# define MADV_NOHUGEPAGE 15 -+#endif -+ -+#ifndef MADV_DONTDUMP -+# define MADV_DONTDUMP 16 -+#endif -+ -+static void parse_vmflags(char *buf, unsigned long *flags, unsigned long *madv) -+{ -+ char *tok; -+ -+ if (!buf[0]) -+ return; -+ -+ tok = strtok(buf, " \n"); -+ if (!tok) -+ return; -+ -+#define _vmflag_match(_t, _s) (_t[0] == _s[0] && _t[1] == _s[1]) -+ -+ do { -+ /* mmap() block */ -+ if (_vmflag_match(tok, "gd")) -+ *flags |= MAP_GROWSDOWN; -+ else if (_vmflag_match(tok, "lo")) -+ *flags |= MAP_LOCKED; -+ else if (_vmflag_match(tok, "nr")) -+ *flags |= MAP_NORESERVE; -+ else if (_vmflag_match(tok, "ht")) -+ *flags |= MAP_HUGETLB; -+ -+ /* madvise() block */ -+ if (_vmflag_match(tok, "sr")) -+ *madv |= (1ul << MADV_SEQUENTIAL); -+ else if (_vmflag_match(tok, "rr")) -+ *madv |= (1ul << MADV_RANDOM); -+ else if (_vmflag_match(tok, "dc")) -+ *madv |= (1ul << MADV_DONTFORK); -+ else if (_vmflag_match(tok, "dd")) -+ *madv |= (1ul << MADV_DONTDUMP); -+ else if (_vmflag_match(tok, "mg")) -+ *madv |= (1ul << MADV_MERGEABLE); -+ else if (_vmflag_match(tok, "hg")) -+ *madv |= (1ul << MADV_HUGEPAGE); -+ else if (_vmflag_match(tok, "nh")) -+ *madv |= (1ul << MADV_NOHUGEPAGE); -+ -+ /* -+ * Anything else is just ignored. -+ */ -+ } while ((tok = strtok(NULL, " \n"))); -+ -+#undef _vmflag_match -+} -+ -+#define is_hex_digit(c) \ -+ (((c) >= '0' && (c) <= '9') || \ -+ ((c) >= 'a' && (c) <= 'f') || \ -+ ((c) >= 'A' && (c) <= 'F')) -+ -+static int is_vma_range_fmt(char *line, unsigned long *start, unsigned long *end) -+{ -+ char *p = line; -+ while (*line && is_hex_digit(*line)) -+ line++; -+ -+ if (*line++ != '-') -+ return 0; -+ -+ while (*line && is_hex_digit(*line)) -+ line++; -+ -+ if (*line++ != ' ') -+ return 0; -+ -+ sscanf(p, "%lx-%lx", start, end); -+ return 1; -+} -+ -+int get_smaps_bits(unsigned long where, unsigned long *flags, unsigned long *madv) -+{ -+ unsigned long start = 0, end = 0; -+ FILE *smaps = NULL; -+ char buf[1024]; -+ int found = 0; -+ -+ if (!where) -+ return 0; -+ -+ smaps = fopen("/proc/self/smaps", "r"); -+ if (!smaps) { -+ pr_perror("Can't open smaps"); -+ return -1; -+ } -+ -+ while (fgets(buf, sizeof(buf), smaps)) { -+ is_vma_range_fmt(buf, &start, &end); -+ -+ if (!strncmp(buf, "VmFlags: ", 9) && start == where) { -+ found = 1; -+ parse_vmflags(buf, flags, madv); -+ break; -+ } -+ } -+ -+ fclose(smaps); -+ -+ if (!found) { -+ pr_perror("VmFlags not found for %lx", where); -+ return -1; -+ } -+ -+ return 0; -+} -diff --git a/test/zdtm/customization/get_smaps_bits.h b/test/zdtm/customization/get_smaps_bits.h -new file mode 100644 -index 0000000..ce1070d ---- /dev/null -+++ b/test/zdtm/customization/get_smaps_bits.h -@@ -0,0 +1,6 @@ -+#ifndef ZDTM_GET_SMAPS_BITS_H_ -+#define ZDTM_GET_SMAPS_BITS_H_ -+ -+extern int get_smaps_bits(unsigned long where, unsigned long *flags, unsigned long *madv); -+ -+#endif /* ZDTM_GET_SMAPS_BITS_H_ */ -diff --git a/test/zdtm/customization/ipc.desc b/test/zdtm/customization/ipc.desc -index 63df42a..4c127a0 100644 ---- a/test/zdtm/customization/ipc.desc -+++ b/test/zdtm/customization/ipc.desc -@@ -1 +1 @@ --{'flavor': 'h'} -+{'arch': 'aarch64', 'flavor': 'h'} -diff --git a/test/zdtm/customization/maps00.c b/test/zdtm/customization/maps00.c -new file mode 100644 -index 0000000..83533f8 ---- /dev/null -+++ b/test/zdtm/customization/maps00.c -@@ -0,0 +1,271 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "zdtmtst.h" -+ -+const char *test_doc = "Create all sorts of maps and compare /proc/pid/maps\n" -+ "before and after migration\n"; -+const char *test_author = "Pavel Emelianov "; -+ -+char *filename; -+TEST_OPTION(filename, string, "file name", 1); -+ -+const static int map_prots[] = { -+ PROT_NONE, -+ PROT_READ, -+ PROT_READ | PROT_WRITE, -+ PROT_READ | PROT_WRITE | PROT_EXEC, -+}; -+#define NUM_MPROTS sizeof(map_prots) / sizeof(int) -+#define RW_PROT(x) ((x) & (PROT_READ | PROT_WRITE)) -+#define X_PROT(x) ((x) & PROT_EXEC) -+ -+int check_prot(int src_prot, int dst_prot) -+{ -+ if (RW_PROT(src_prot) != RW_PROT(dst_prot)) -+ return 0; -+ /* If exec bit will be enabled may depend on NX capability of CPUs of -+ * source and destination nodes. In any case, migrated mapping should -+ * not have less permissions than newly created one -+ ** -+ * A is a subset of B iff (A & B) == A -+ */ -+ return (X_PROT(dst_prot) & X_PROT(src_prot)) == X_PROT(dst_prot); -+} -+ -+const static int map_flags[] = { -+ MAP_PRIVATE, -+ MAP_SHARED, -+ MAP_PRIVATE | MAP_ANONYMOUS, -+ MAP_SHARED | MAP_ANONYMOUS -+}; -+#define NUM_MFLAGS sizeof(map_flags) / sizeof(int) -+#define NUM_MAPS NUM_MPROTS * NUM_MFLAGS -+#define ONE_MAP_SIZE 0x2000 -+ -+struct map -+{ -+ int prot; -+ int prot_real; -+ int flag; -+ char filename[256]; -+ int fd; -+ void *ptr; -+}; -+ -+static void init_map(struct map *map, int prot_no, int flag_no) -+{ -+ map->fd = -1; -+ map->prot = map_prots[prot_no]; -+ map->flag = map_flags[flag_no]; -+} -+ -+static int make_map(struct map *map) -+{ -+ uint32_t crc; -+ uint8_t buf[ONE_MAP_SIZE]; -+ static int i = 0; -+ -+ if (!(map->flag & MAP_ANONYMOUS)) { -+ /* need file */ -+ if (snprintf(map->filename, sizeof(map->filename), -+ "%s-%02d", filename, i++) >= sizeof(map->filename)) { -+ pr_perror("filename %s is too long", filename); -+ return -1; -+ } -+ -+ map->fd = open(map->filename, O_RDWR | O_CREAT, 0600); -+ if (map->fd < 0) { -+ pr_perror("can't open %s", map->filename); -+ return -1; -+ } -+ -+ crc = ~0; -+ datagen(buf, sizeof(buf), &crc); -+ if (write(map->fd, buf, sizeof(buf)) != sizeof(buf)) { -+ pr_perror("failed to write %s", map->filename); -+ return -1; -+ } -+ } -+ -+ map->ptr = mmap(NULL, ONE_MAP_SIZE, map->prot, map->flag, map->fd, 0); -+ if (map->ptr == MAP_FAILED) { -+ pr_perror("can't create mapping"); -+ return -1; -+ } -+ -+ if ((map->flag & MAP_ANONYMOUS) && (map->prot & PROT_WRITE)) { -+ /* can't fill it with data otherwise */ -+ crc = ~0; -+ datagen(map->ptr, ONE_MAP_SIZE, &crc); -+ } -+ -+ test_msg("map: ptr %p flag %8x prot %8x\n", -+ map->ptr, map->flag, map->prot); -+ -+ return 0; -+} -+ -+static sigjmp_buf segv_ret; /* we need sig*jmp stuff, otherwise SIGSEGV will reset our handler */ -+static void segfault(int signo) -+{ -+ siglongjmp(segv_ret, 1); -+} -+ -+/* -+ * after test func should be placed check map, because size of test_func -+ * is calculated as (check_map-test_func) -+ */ -+int test_func(void) -+{ -+ return 1; -+} -+static int check_map(struct map *map) -+{ -+ int prot = PROT_WRITE | PROT_READ | PROT_EXEC; -+ -+ if (signal(SIGSEGV, segfault) == SIG_ERR) -+ { -+ fail("setting SIGSEGV handler failed: %m\n"); -+ return -1; -+ } -+ if (!sigsetjmp(segv_ret, 1)) -+ { -+ uint32_t crc = ~0; -+ if (datachk(map->ptr, ONE_MAP_SIZE, &crc)) /* perform read access */ -+ if (!(map->flag & MAP_ANONYMOUS) || -+ (map->prot & PROT_WRITE)) { /* anon maps could only be filled when r/w */ -+ fail("CRC mismatch: ptr %p flag %8x prot %8x\n", -+ map->ptr, map->flag, map->prot); -+ return -1; -+ } -+ /* prot |= PROT_READ// need barrier before this line, -+ because compiler change order commands. -+ I finded one method: look at next lines*/ -+ } else -+ prot &= PROT_WRITE | !PROT_READ | PROT_EXEC; -+ -+ if (signal(SIGSEGV, segfault) == SIG_ERR) -+ { -+ fail("setting SIGSEGV handler failed: %m\n"); -+ return -1; -+ } -+ -+ if (!sigsetjmp(segv_ret, 1)) -+ { -+ * (int *) (map->ptr) = 1234; /* perform write access */ -+ } else -+ prot &= !PROT_WRITE | PROT_READ | PROT_EXEC; -+ -+ if (signal(SIGSEGV, segfault) == SIG_ERR) -+ { -+ fail("restoring SIGSEGV handler failed: %m\n"); -+ return -1; -+ } -+ -+ if (!sigsetjmp(segv_ret, 1)) -+ { -+ if (map->prot & PROT_WRITE) { -+ memcpy(map->ptr,test_func, ONE_MAP_SIZE); -+ __builtin___clear_cache(map->ptr, map->ptr+ONE_MAP_SIZE); -+ } else { -+ if (!(map->flag & MAP_ANONYMOUS)) { -+ uint8_t funlen = (uint8_t *)check_map - (uint8_t *)test_func; -+ lseek(map->fd,0,SEEK_SET); -+ if (write(map->fd,test_func,funlen)filename); -+ return -1; -+ } -+ } -+ } -+ if (!(map->flag & MAP_ANONYMOUS) || (map->prot & PROT_WRITE)) { -+ /* Function body has been copied into the mapping */ -+ ((int (*)(void))map->ptr)(); /* perform exec access */ -+ } else { -+ /* No way to copy function body into mapping, -+ * clear exec bit from effective protection -+ */ -+ prot &= PROT_WRITE | PROT_READ | !PROT_EXEC; -+ } -+ } else -+ prot &= PROT_WRITE | PROT_READ | !PROT_EXEC; -+ -+ if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) -+ { -+ fail("restoring SIGSEGV handler failed: %m\n"); -+ return -1; -+ } -+ -+ return prot; -+} -+ -+static void destroy_map(struct map *map) -+{ -+ munmap(map->ptr, ONE_MAP_SIZE); -+ -+ if (map->fd >= 0) -+ { -+ close(map->fd); -+ unlink(map->filename); -+ } -+} -+ -+ -+#define MAPS_LEN 0x10000 -+ -+int main(int argc, char ** argv) -+{ -+ struct map maps[NUM_MAPS] = {}, maps_compare[NUM_MAPS] = {}; -+ int i, j, k; -+ test_init(argc, argv); -+ -+ k = 0; -+ for (i = 0; i < NUM_MPROTS; i++) -+ for (j = 0; j < NUM_MFLAGS; j++) -+ init_map(maps + k++, i, j); -+ -+ for (i = 0; i < NUM_MAPS; i++) -+ if (make_map(maps + i)) -+ goto err; -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ for (i = 0; i < NUM_MAPS; i++) -+ if ((maps[i].prot_real=check_map(maps + i))<0) -+ goto err; -+ k=0; -+ for (i = 0; i < NUM_MPROTS; i++) -+ for (j = 0; j < NUM_MFLAGS; j++) -+ init_map(maps_compare + k++, i, j); -+ for (i = 0; i < NUM_MAPS; i++) -+ if (make_map(maps_compare+ i)) -+ goto err; -+ for (i = 0; i < NUM_MAPS; i++) -+ if ((maps_compare[i].prot_real=check_map(maps_compare + i))<0) -+ goto err; -+ for (i = 0; i< NUM_MAPS; i++) -+ if (!check_prot(maps[i].prot_real, maps_compare[i].prot_real)){ -+ fail("protection on %i (flag=%d prot=%d) maps has changed (prot=%d(expected %d))", -+ i, maps[i].flag, maps[i].prot, maps[i].prot_real, maps_compare[i].prot_real); -+ goto err; -+ } -+ -+ pass(); -+ -+ for (i = 0; i < NUM_MAPS; i++) { -+ destroy_map(maps + i); -+ destroy_map(maps_compare + i); -+ } -+ return 0; -+ -+err: -+ return 1; -+} -diff --git a/test/zdtm/customization/maps00.desc b/test/zdtm/customization/maps00.desc -new file mode 100644 -index 0000000..dad462e ---- /dev/null -+++ b/test/zdtm/customization/maps00.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'flavor': 'h', 'opts': '--pin-memory --use-fork-pid', 'flags': 'suid', 'cmdline': 'pinmemory max_pin_pid_num'} -diff --git a/test/zdtm/customization/maps007.c b/test/zdtm/customization/maps007.c -new file mode 100644 -index 0000000..ee5e7c7 ---- /dev/null -+++ b/test/zdtm/customization/maps007.c -@@ -0,0 +1,178 @@ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "zdtmtst.h" -+#include "lock.h" -+ -+#define MAP_SIZE (1UL << 20) -+#define MEM_SIZE (1UL << 29) -+ -+const char *test_doc = "create random mappings and touch memory"; -+ -+int sys_process_vm_readv(pid_t pid, void *addr, void *buf, int size) -+{ -+ struct iovec lvec = {.iov_base = buf, .iov_len = size }; -+ struct iovec rvec = {.iov_base = addr, .iov_len = size }; -+ /* workaround bug in glibc with sixth argument of syscall */ -+ char nop[PAGE_SIZE]; -+ -+ memset(nop, 0, sizeof(nop)); -+ -+ return syscall(__NR_process_vm_readv, pid, &lvec, 1, &rvec, 1, 0); -+} -+ -+/* The child follows the parents two steps behind. */ -+#define MAX_DELTA 1000 -+int main(int argc, char **argv) -+{ -+ void *start, *end, *p; -+ pid_t child; -+ struct { -+ futex_t delta; -+ futex_t stop; -+ } *shm; -+ uint32_t v; -+ unsigned long long count = 0; -+ int i; -+ -+ test_init(argc, argv); -+ -+ /* shared memory for synchronization */ -+ shm = mmap(NULL, PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_SHARED, -1, 0); -+ if (shm == MAP_FAILED) -+ return -1; -+ -+ /* allocate workspace */ -+ start = mmap(NULL, MEM_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if (start == MAP_FAILED) -+ return -1; -+ -+ test_msg("%p-%p\n", start, start + MEM_SIZE); -+ -+ end = start + MEM_SIZE; -+ -+ v = 0; -+ futex_set(&shm->delta, v); -+ futex_set(&shm->stop, 0); -+ -+ child = fork(); -+ if (child < 0) { -+ pr_perror("fork"); -+ return 1; -+ } -+ -+ while (1) { -+ void *ret; -+ unsigned long size; -+ int prot = PROT_NONE; -+ -+ if (child) { -+ if (!test_go()) -+ break; -+ futex_wait_while_gt(&shm->delta, 2 * MAX_DELTA); -+ futex_inc_and_wake(&shm->delta); -+ } else { -+ if (!futex_get(&shm->stop)) -+ /* shm->delta must be always bigger than MAX_DELTA */ -+ futex_wait_while_lt(&shm->delta, MAX_DELTA + 2); -+ else if (count % 100 == 0) -+ test_msg("count %llu delta %d\n", -+ count, futex_get(&shm->delta)); /* heartbeat */ -+ -+ if (futex_get(&shm->stop) && atomic_get(&shm->delta.raw) == MAX_DELTA) -+ break; -+ futex_dec_and_wake(&shm->delta); -+ } -+ -+ count++; -+ if (child && count == MAX_DELTA + 1) -+ test_daemon(); -+ -+ p = start + ((lrand48() * PAGE_SIZE) % MEM_SIZE); -+ size = lrand48() * PAGE_SIZE; -+ size %= (end - p); -+ size %= MAP_SIZE; -+ if (size == 0) -+ size = PAGE_SIZE; -+ -+ if (lrand48() % 2) -+ prot |= PROT_READ; -+ if (lrand48() % 2) -+ prot |= PROT_EXEC; -+ if (lrand48() % 2) -+ prot |= PROT_WRITE; -+ -+ ret = mmap(p, size, prot, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); -+ if (ret == MAP_FAILED) { -+ pr_perror("%p-%p", p, p + size); -+ goto err; -+ } -+ -+ if (!(prot & PROT_WRITE)) -+ continue; -+ -+ for (i = 0; i < lrand48() % 50; i++) { -+ char *t = p + (lrand48() * PAGE_SIZE) % (size); -+ t[0] = lrand48(); -+ } -+ } -+ test_msg("count %llu\n", count); -+ -+ if (child == 0) { -+ if (!test_go()) -+ pr_perror("unexpected state"); -+ futex_set_and_wake(&shm->stop, 2); -+ test_waitsig(); -+ return 0; -+ } else { -+ int readable = 0, status = -1; -+ -+ /* stop the child */ -+ futex_set(&shm->stop, 1); -+ futex_add_and_wake(&shm->delta, MAX_DELTA); -+ /* wait until the child will be in the same point */ -+ futex_wait_until(&shm->stop, 2); -+ -+ /* check that child and parent have the identical content of memory */ -+ for (p = start; p < end; p += PAGE_SIZE) { -+ char rbuf[PAGE_SIZE], lbuf[PAGE_SIZE]; -+ int rret, lret; -+ -+ lret = sys_process_vm_readv(getpid(), p, lbuf, PAGE_SIZE); -+ rret = sys_process_vm_readv(child, p, rbuf, PAGE_SIZE); -+ if (rret != lret) { -+ pr_perror("%p %d %d", p, lret, rret); -+ goto err; -+ } -+ if (lret < 0) -+ continue; -+ readable++; -+ if (memcmp(rbuf, lbuf, PAGE_SIZE)) { -+ pr_perror("%p", p); -+ goto err; -+ } -+ } -+ test_msg("readable %d\n", readable); -+ kill(child, SIGTERM); -+ wait(&status); -+ if (status != 0) { -+ pr_perror("Non-zero exit code: %d", status); -+ goto err; -+ } -+ pass(); -+ } -+ -+ return 0; -+err: -+ kill(child, SIGSEGV); -+ *((volatile int *) 0) = 0; -+ return 1; -+} -diff --git a/test/zdtm/customization/maps007.desc b/test/zdtm/customization/maps007.desc -new file mode 100644 -index 0000000..9ed7e46 ---- /dev/null -+++ b/test/zdtm/customization/maps007.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'opts': '--pin-memory --use-fork-pid', 'flags': 'suid', 'flavor': 'h', 'cmdline': 'pinmemory max_pin_pid_num'} -diff --git a/test/zdtm/customization/maps008.c b/test/zdtm/customization/maps008.c -new file mode 100644 -index 0000000..7ed7c10 ---- /dev/null -+++ b/test/zdtm/customization/maps008.c -@@ -0,0 +1,514 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "zdtmtst.h" -+#include "lock.h" -+ -+const char *test_doc = "ps tree with anon shared vmas for dedup"; -+ -+/* -+ * 1. ps tree with non triavial anon shmem vmas is created first. -+ * 2. Each process gets its portion of shmem vmas. -+ * 3. Each process continuously datagens its portion until -+ * criu dump is finished. -+ * 4. Each process datachecks all its shmem portions after restore. -+ * 5. Contents of anon shmem vmas are checked for equality in -+ * different processes. -+ */ -+ -+typedef int (*proc_func_t)(task_waiter_t *setup_waiter); -+ -+static pid_t fork_and_setup(proc_func_t pfunc) -+{ -+ task_waiter_t setup_waiter; -+ pid_t pid; -+ -+ task_waiter_init(&setup_waiter); -+ pid = test_fork(); -+ if (pid < 0) { -+ pr_perror("fork failed"); -+ exit(1); -+ } -+ -+ if (pid == 0) -+ exit(pfunc(&setup_waiter)); -+ -+ task_waiter_wait4(&setup_waiter, pid); -+ task_waiter_fini(&setup_waiter); -+ return pid; -+} -+ -+static void cont_and_wait_child(pid_t pid) -+{ -+ int status; -+ -+ kill(pid, SIGTERM); -+ waitpid(pid, &status, 0); -+ if (WIFEXITED(status)) { -+ if (WEXITSTATUS(status)) -+ exit(WEXITSTATUS(status)); -+ } else -+ exit(1); -+} -+ -+static void *mmap_ashmem(size_t size) -+{ -+ void *mem = mmap(NULL, size, PROT_WRITE | PROT_READ, -+ MAP_SHARED | MAP_ANONYMOUS, -1, 0); -+ if (mem == MAP_FAILED) { -+ pr_perror("Can't map shmem %zx", size); -+ exit(1); -+ } -+ return mem; -+} -+ -+static void *mmap_proc_mem(pid_t pid, unsigned long addr, -+ unsigned long size) -+{ -+ int fd; -+ void *mem; -+ char path[PATH_MAX]; -+ -+ snprintf(path, PATH_MAX, "/proc/%d/map_files/%lx-%lx", -+ (int)pid, addr, addr + size); -+ fd = open(path, O_RDWR); -+ if (fd == -1) { -+ pr_perror("Can't open file %s", path); -+ exit(1); -+ } -+ -+ mem = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); -+ close(fd); -+ if (mem == MAP_FAILED) { -+ pr_perror("Can't map file %s", path); -+ exit(1); -+ } -+ return mem; -+} -+ -+static void check_mem_eq(void *addr1, size_t size1, void *addr2, size_t size2) -+{ -+ unsigned long min_size = size1 < size2 ? size1 : size2; -+ -+ if (memcmp(addr1, addr2, min_size)) { -+ pr_err("Mem differs %lx %lx %lx", (unsigned long)addr1, -+ (unsigned long)addr2, min_size); -+ exit(1); -+ } -+} -+ -+static void xmunmap(void *map, size_t size) -+{ -+ if (munmap(map, size)) { -+ pr_err("xmunmap"); -+ exit(1); -+ } -+} -+ -+static void chk_proc_mem_eq(pid_t pid1, void *addr1, unsigned long size1, -+ pid_t pid2, void *addr2, unsigned long size2) -+{ -+ void *map1, *map2; -+ -+ map1 = mmap_proc_mem(pid1, (unsigned long)addr1, size1); -+ map2 = mmap_proc_mem(pid2, (unsigned long)addr2, size2); -+ check_mem_eq(map1, size1, map2, size2); -+ xmunmap(map1, size1); -+ xmunmap(map2, size2); -+} -+ -+/* -+ * ps tree: -+ * proc1_______________ -+ * | | | -+ * proc11___ proc12 proc13 -+ * | | | -+ * proc111 proc112 proc131 -+ */ -+#define PROC_CNT 7 -+ -+#define PROC1_PGIX 0 -+#define PROC11_PGIX 1 -+#define PROC12_PGIX 2 -+#define PROC13_PGIX 3 -+#define PROC111_PGIX 4 -+#define PROC112_PGIX 5 -+#define PROC131_PGIX 6 -+#define ZERO_PGIX 7 -+/* unused pgix: 8 */ -+#define MEM_PERIOD (9 * PAGE_SIZE) -+ -+struct pstree { -+ pid_t proc1; -+ pid_t proc11; -+ pid_t proc12; -+ pid_t proc13; -+ pid_t proc111; -+ pid_t proc112; -+ pid_t proc131; -+}; -+struct pstree *pstree; -+ -+struct test_sync { -+ futex_t datagen; -+ futex_t datagen_exit_cnt; -+}; -+struct test_sync *test_sync; -+ -+size_t mem1_size, mem2_size, mem3_size; -+uint8_t *mem1, *mem2, *mem3; -+ -+#define CRC_EPOCH_OFFSET (PAGE_SIZE - sizeof(uint32_t)) -+ -+static void read_each_pg(volatile uint8_t *mem, size_t size, size_t off) -+{ -+ if (!mem) -+ return; -+ -+ while (off < size) { -+ (mem + off)[0]; -+ off += MEM_PERIOD; -+ } -+} -+ -+void datagen_each_pg(uint8_t *mem, size_t size, size_t off, uint32_t crc_epoch) -+{ -+ if (!mem) -+ return; -+ -+ while (futex_get(&test_sync->datagen) && (off < size)) { -+ uint32_t crc = crc_epoch; -+ -+ datagen(mem + off, CRC_EPOCH_OFFSET, &crc); -+ *(uint32_t *)(mem + off + CRC_EPOCH_OFFSET) = crc_epoch; -+ off += MEM_PERIOD; -+ } -+} -+ -+void datachck_each_pg(uint8_t *mem, size_t size, size_t off) -+{ -+ if (!mem) -+ return; -+ -+ while (off < size) { -+ uint32_t crc = *(uint32_t *)(mem + off + CRC_EPOCH_OFFSET); -+ -+ if (datachk(mem + off, CRC_EPOCH_OFFSET, &crc)) -+ exit(1); -+ off += MEM_PERIOD; -+ } -+} -+ -+static void mems_read_each_pgix(size_t pgix) -+{ -+ const size_t off = pgix * PAGE_SIZE; -+ -+ read_each_pg(mem1, mem1_size, off); -+ read_each_pg(mem2, mem2_size, off); -+ read_each_pg(mem3, mem3_size, off); -+} -+ -+static void mems_datagen_each_pgix(size_t pgix, uint32_t *crc_epoch) -+{ -+ const size_t off = pgix * PAGE_SIZE; -+ -+ ++(*crc_epoch); -+ datagen_each_pg(mem1, mem1_size, off, *crc_epoch); -+ datagen_each_pg(mem2, mem2_size, off, *crc_epoch); -+ datagen_each_pg(mem3, mem3_size, off, *crc_epoch); -+} -+ -+static void mems_datachck_each_pgix(size_t pgix) -+{ -+ const size_t off = pgix * PAGE_SIZE; -+ -+ datachck_each_pg(mem1, mem1_size, off); -+ datachck_each_pg(mem2, mem2_size, off); -+ datachck_each_pg(mem3, mem3_size, off); -+} -+ -+static int proc131_func(task_waiter_t *setup_waiter) -+{ -+ uint32_t crc_epoch = 0; -+ -+ pstree->proc131 = getpid(); -+ mems_datagen_each_pgix(PROC131_PGIX, &crc_epoch); -+ task_waiter_complete_current(setup_waiter); -+ -+ while (futex_get(&test_sync->datagen)) -+ mems_datagen_each_pgix(PROC131_PGIX, &crc_epoch); -+ futex_inc_and_wake(&test_sync->datagen_exit_cnt); -+ test_waitsig(); -+ -+ mems_datachck_each_pgix(PROC131_PGIX); -+ return 0; -+} -+ -+static int proc13_func(task_waiter_t *setup_waiter) -+{ -+ size_t MEM1_HOLE_START = 2 * MEM_PERIOD; -+ size_t MEM1_HOLE_SIZE = 1 * MEM_PERIOD; -+ uint32_t crc_epoch = 0; -+ -+ pstree->proc13 = getpid(); -+ xmunmap(mem1 + MEM1_HOLE_START, MEM1_HOLE_SIZE); -+ xmunmap(mem2, mem2_size); -+ xmunmap(mem3, mem3_size); -+ mem2 = mem1 + MEM1_HOLE_START + MEM1_HOLE_SIZE; -+ mem2_size = mem1_size - (mem2 - mem1); -+ mem1_size = MEM1_HOLE_START; -+ mem3 = mmap_ashmem(mem3_size); -+ mems_datagen_each_pgix(PROC13_PGIX, &crc_epoch); -+ fork_and_setup(proc131_func); -+ task_waiter_complete_current(setup_waiter); -+ -+ while (futex_get(&test_sync->datagen)) -+ mems_datagen_each_pgix(PROC13_PGIX, &crc_epoch); -+ futex_inc_and_wake(&test_sync->datagen_exit_cnt); -+ test_waitsig(); -+ -+ mems_datachck_each_pgix(PROC13_PGIX); -+ -+ chk_proc_mem_eq(pstree->proc13, mem1, mem1_size, -+ pstree->proc131, mem1, mem1_size); -+ chk_proc_mem_eq(pstree->proc13, mem2, mem2_size, -+ pstree->proc131, mem2, mem2_size); -+ chk_proc_mem_eq(pstree->proc13, mem3, mem3_size, -+ pstree->proc131, mem3, mem3_size); -+ -+ cont_and_wait_child(pstree->proc131); -+ return 0; -+} -+ -+static int proc12_func(task_waiter_t *setup_waiter) -+{ -+ uint32_t crc_epoch = 0; -+ -+ pstree->proc12 = getpid(); -+ mems_datagen_each_pgix(PROC12_PGIX, &crc_epoch); -+ task_waiter_complete_current(setup_waiter); -+ -+ while (futex_get(&test_sync->datagen)) -+ mems_datagen_each_pgix(PROC12_PGIX, &crc_epoch); -+ futex_inc_and_wake(&test_sync->datagen_exit_cnt); -+ test_waitsig(); -+ -+ mems_datachck_each_pgix(PROC12_PGIX); -+ -+ return 0; -+} -+ -+static int proc111_func(task_waiter_t *setup_waiter) -+{ -+ uint32_t crc_epoch = 0; -+ -+ pstree->proc111 = getpid(); -+ mems_datagen_each_pgix(PROC111_PGIX, &crc_epoch); -+ task_waiter_complete_current(setup_waiter); -+ -+ while (futex_get(&test_sync->datagen)) -+ mems_datagen_each_pgix(PROC111_PGIX, &crc_epoch); -+ futex_inc_and_wake(&test_sync->datagen_exit_cnt); -+ test_waitsig(); -+ -+ mems_datachck_each_pgix(PROC111_PGIX); -+ return 0; -+} -+ -+static int proc112_func(task_waiter_t *setup_waiter) -+{ -+ uint32_t crc_epoch = 0; -+ -+ pstree->proc112 = getpid(); -+ mems_datagen_each_pgix(PROC112_PGIX, &crc_epoch); -+ task_waiter_complete_current(setup_waiter); -+ -+ while (futex_get(&test_sync->datagen)) -+ mems_datagen_each_pgix(PROC112_PGIX, &crc_epoch); -+ futex_inc_and_wake(&test_sync->datagen_exit_cnt); -+ test_waitsig(); -+ -+ mems_datachck_each_pgix(PROC112_PGIX); -+ return 0; -+} -+ -+static int proc11_func(task_waiter_t *setup_waiter) -+{ -+ const size_t MEM3_START_CUT = 1 * MEM_PERIOD; -+ const size_t MEM3_END_CUT = 2 * MEM_PERIOD; -+ void *mem3_old = mem3; -+ size_t mem3_size_old = mem3_size; -+ uint32_t crc_epoch = 0; -+ uint8_t *proc1_mem3; -+ -+ pstree->proc11 = getpid(); -+ xmunmap(mem3, MEM3_START_CUT); -+ mem3 += MEM3_START_CUT; -+ mem3_size -= MEM3_START_CUT; -+ fork_and_setup(proc111_func); -+ fork_and_setup(proc112_func); -+ xmunmap(mem3 + mem3_size - MEM3_END_CUT, MEM3_END_CUT); -+ mem3_size -= MEM3_END_CUT; -+ mems_datagen_each_pgix(PROC11_PGIX, &crc_epoch); -+ task_waiter_complete_current(setup_waiter); -+ -+ while (futex_get(&test_sync->datagen)) -+ mems_datagen_each_pgix(PROC11_PGIX, &crc_epoch); -+ futex_inc_and_wake(&test_sync->datagen_exit_cnt); -+ test_waitsig(); -+ -+ mems_datachck_each_pgix(PROC11_PGIX); -+ -+ chk_proc_mem_eq(pstree->proc11, mem1, mem1_size, -+ pstree->proc111, mem1, mem1_size); -+ chk_proc_mem_eq(pstree->proc11, mem1, mem1_size, -+ pstree->proc112, mem1, mem1_size); -+ -+ chk_proc_mem_eq(pstree->proc11, mem2, mem2_size, -+ pstree->proc111, mem2, mem2_size); -+ chk_proc_mem_eq(pstree->proc11, mem2, mem2_size, -+ pstree->proc112, mem2, mem2_size); -+ -+ chk_proc_mem_eq(pstree->proc11, mem3, mem3_size, -+ pstree->proc111, mem3, mem3_size + MEM3_END_CUT); -+ chk_proc_mem_eq(pstree->proc11, mem3, mem3_size, -+ pstree->proc112, mem3, mem3_size + MEM3_END_CUT); -+ -+ proc1_mem3 = mmap_proc_mem(pstree->proc1, -+ (unsigned long)mem3_old, mem3_size_old); -+ check_mem_eq(mem3, mem3_size, proc1_mem3 + MEM3_START_CUT, mem3_size); -+ xmunmap(proc1_mem3, mem3_size_old); -+ -+ cont_and_wait_child(pstree->proc111); -+ cont_and_wait_child(pstree->proc112); -+ return 0; -+} -+ -+#define MAX(a, b) ((a) > (b) ? (a) : (b)) -+#define MB(n) ((n) * (1UL << 20)) -+ -+static int proc1_func(void) -+{ -+ uint32_t crc_epoch = 0; -+ uint8_t *mem2_old = NULL; -+ -+ /* -+ * Min mem size: -+ * At least 5 mem periods for mem pages and vma holes. -+ * At least 1 MB mem size not to test on tiny working set. -+ */ -+ mem1_size = MEM_PERIOD * MAX(5, MB(1) / MEM_PERIOD + 1); -+ mem2_size = mem1_size * 2; -+ mem3_size = mem2_size * 3; -+ -+ futex_set(&test_sync->datagen, 1); -+ pstree->proc1 = getpid(); -+ mem1 = mmap_ashmem(mem1_size); -+ mem2 = mmap_ashmem(mem2_size); -+ mem3 = mmap_ashmem(mem3_size); -+ mems_datagen_each_pgix(PROC1_PGIX, &crc_epoch); -+ mems_read_each_pgix(ZERO_PGIX); -+ -+ fork_and_setup(proc11_func); -+ fork_and_setup(proc12_func); -+ fork_and_setup(proc13_func); -+ -+ xmunmap(mem1, mem1_size); -+ if (mremap(mem2, mem2_size, mem1_size, MREMAP_MAYMOVE | MREMAP_FIXED, -+ mem1) != mem1) { -+ pr_perror("proc1 mem2 remap"); -+ exit(1); -+ } -+ mem2_old = mem2; -+ mem2 = NULL; -+ -+ test_daemon(); -+ while (test_go()) -+ mems_datagen_each_pgix(PROC1_PGIX, &crc_epoch); -+ test_waitsig(); -+ futex_set(&test_sync->datagen_exit_cnt, 0); -+ futex_set(&test_sync->datagen, 0); -+ futex_wait_while(&test_sync->datagen_exit_cnt, PROC_CNT); -+ -+ mems_datachck_each_pgix(PROC1_PGIX); -+ -+ chk_proc_mem_eq(pstree->proc1, mem1, mem1_size, -+ pstree->proc11, mem2_old, mem2_size); -+ chk_proc_mem_eq(pstree->proc1, mem1, mem1_size, -+ pstree->proc12, mem2_old, mem2_size); -+ -+ chk_proc_mem_eq(pstree->proc1, mem3, mem3_size, -+ pstree->proc12, mem3, mem3_size); -+ -+ cont_and_wait_child(pstree->proc11); -+ cont_and_wait_child(pstree->proc12); -+ cont_and_wait_child(pstree->proc13); -+ -+ pass(); -+ return 0; -+} -+ -+static void kill_pstree_from_root(void) -+{ -+ if (getpid() != pstree->proc1) -+ return; -+ -+ kill(pstree->proc11, SIGKILL); -+ kill(pstree->proc12, SIGKILL); -+ kill(pstree->proc13, SIGKILL); -+ kill(pstree->proc111, SIGKILL); -+ kill(pstree->proc112, SIGKILL); -+ kill(pstree->proc131, SIGKILL); -+} -+ -+static void sigchld_hand(int signo, siginfo_t *info, void *ucontext) -+{ -+ if (info->si_code != CLD_EXITED) -+ return; -+ if (!info->si_status) -+ return; -+ -+ /* -+ * If we are not ps tree root then propagate child error to parent. -+ * If we are ps tree root then also call all -+ * atexit handlers set up by zdtm test framework and this test. -+ * exit() is not async signal safe but it's ok for testing purposes. -+ * exit() usage allows us to use very simple error handling -+ * and pstree killing logic. -+ */ -+ exit(info->si_status); -+} -+ -+int main(int argc, char **argv) -+{ -+ struct sigaction sa = { -+ .sa_sigaction = sigchld_hand, -+ .sa_flags = SA_RESTART | SA_SIGINFO | SA_NOCLDSTOP -+ }; -+ sigemptyset(&sa.sa_mask); -+ -+ test_init(argc, argv); -+ -+ pstree = (struct pstree *)mmap_ashmem(PAGE_SIZE); -+ test_sync = (struct test_sync *)mmap_ashmem(sizeof(*test_sync)); -+ -+ if (sigaction(SIGCHLD, &sa, NULL)) { -+ pr_perror("SIGCHLD handler setup"); -+ exit(1); -+ }; -+ -+ if (atexit(kill_pstree_from_root)) { -+ pr_err("Can't setup atexit cleanup func"); -+ exit(1); -+ } -+ return proc1_func(); -+} -diff --git a/test/zdtm/customization/maps008.desc b/test/zdtm/customization/maps008.desc -new file mode 100644 -index 0000000..154ef8c ---- /dev/null -+++ b/test/zdtm/customization/maps008.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'opts': '--pin-memory --use-fork-pid', 'flavor': 'h', 'flags': 'suid', 'cmdline': 'pinmemory max_pin_pid_num'} -diff --git a/test/zdtm/customization/maps01.c b/test/zdtm/customization/maps01.c -new file mode 100644 -index 0000000..119d7a6 ---- /dev/null -+++ b/test/zdtm/customization/maps01.c -@@ -0,0 +1,183 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "zdtmtst.h" -+ -+#define MEM_SIZE (1LU << 30) -+#define MEM_OFFSET (1LU << 29) -+#define MEM_OFFSET2 (MEM_SIZE - PAGE_SIZE) -+#define MEM_OFFSET3 (20LU * PAGE_SIZE) -+ -+const char *test_doc = "Test shared memory"; -+const char *test_author = "Andrew Vagin > 20); -+ goto err; -+ } -+ -+ p = mmap(NULL, MEM_SIZE, PROT_WRITE | PROT_READ, -+ MAP_SHARED | MAP_ANONYMOUS, -1, 0); -+ -+ if (p == MAP_FAILED) { -+ pr_err("Failed to mmap %ld Mb shared anonymous R/W memory\n", -+ MEM_SIZE >> 20); -+ goto err; -+ } -+ -+ p2 = mmap(NULL, MEM_OFFSET, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if (p2 == MAP_FAILED) { -+ pr_err("Failed to mmap %lu Mb anonymous memory\n", -+ MEM_OFFSET >> 20); -+ goto err; -+ } -+ -+ pid = test_fork(); -+ if (pid < 0) { -+ pr_err("Fork failed with %d\n", pid); -+ goto err; -+ } else if (pid == 0) { -+ void *p3; -+ -+ p3 = mmap(NULL, MEM_OFFSET3, PROT_READ | PROT_WRITE, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if (p3 == MAP_FAILED) { -+ pr_err("Failed to mmap %lu Mb anonymous R/W memory\n", -+ MEM_OFFSET3 >> 20); -+ goto err; -+ } -+ -+ crc = ~0; -+ datagen(m + MEM_OFFSET, PAGE_SIZE, &crc); -+ crc = ~0; -+ datagen(m + MEM_OFFSET2, PAGE_SIZE, &crc); -+ crc = ~0; -+ datagen(p + MEM_OFFSET + MEM_OFFSET3, PAGE_SIZE, &crc); -+ crc = ~0; -+ datagen(p + MEM_OFFSET + 2 * MEM_OFFSET3, PAGE_SIZE, &crc); -+ crc = ~0; -+ datagen(p + MEM_OFFSET3, PAGE_SIZE, &crc); -+ crc = ~0; -+ datagen(p3, PAGE_SIZE, &crc); -+ -+ task_waiter_complete(&t, 1); -+ -+ test_waitsig(); -+ -+ crc = ~0; -+ status = datachk(m + MEM_OFFSET, PAGE_SIZE, &crc); -+ if (status) -+ return 1; -+ crc = ~0; -+ status = datachk(m + MEM_OFFSET2, PAGE_SIZE, &crc); -+ if (status) -+ return 1; -+ crc = ~0; -+ status = datachk(m + PAGE_SIZE, PAGE_SIZE, &crc); -+ if (status) -+ return 1; -+ crc = ~0; -+ status = datachk(p + MEM_OFFSET + 2 * MEM_OFFSET3, PAGE_SIZE, &crc); -+ if (status) -+ return 1; -+ crc = ~0; -+ status = datachk(p + MEM_OFFSET3, PAGE_SIZE, &crc); -+ if (status) -+ return 1; -+ crc = ~0; -+ status = datachk(p3, PAGE_SIZE, &crc); -+ if (status) -+ return 1; -+ return 0; -+ } -+ task_waiter_wait4(&t, 1); -+ -+ munmap(p, MEM_OFFSET); -+ p2 = mremap(p + MEM_OFFSET, MEM_OFFSET, MEM_OFFSET, MREMAP_FIXED | MREMAP_MAYMOVE, p2); -+ if (p2 == MAP_FAILED) -+ goto err; -+ -+ snprintf(path, PATH_MAX, "/proc/self/map_files/%lx-%lx", -+ (unsigned long) m, -+ (unsigned long) m + MEM_SIZE); -+ fd = open(path, O_RDWR); -+ if (fd == -1) { -+ pr_perror("Can't open file %s", path); -+ goto err; -+ } -+ -+ m2 = mmap(NULL, PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, fd, MEM_OFFSET3); -+ if (m2 == MAP_FAILED) { -+ pr_perror("Can't map file %s", path); -+ goto err; -+ } -+ close(fd); -+ -+ munmap(m, PAGE_SIZE); -+ munmap(m + PAGE_SIZE * 10, PAGE_SIZE); -+ munmap(m + MEM_OFFSET2, PAGE_SIZE); -+ -+ crc = ~0; -+ datagen(m + PAGE_SIZE, PAGE_SIZE, &crc); -+ -+ crc = ~0; -+ datagen(m2, PAGE_SIZE, &crc); -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ kill(pid, SIGTERM); -+ wait(&status); -+ if (WIFEXITED(status)) { -+ if (WEXITSTATUS(status)) -+ goto err; -+ } else -+ goto err; -+ -+ crc = ~0; -+ if (datachk(m + MEM_OFFSET, PAGE_SIZE, &crc)) -+ goto err; -+ -+ crc = ~0; -+ if (datachk(m2, PAGE_SIZE, &crc)) -+ goto err; -+ -+ crc = ~0; -+ if (datachk(p2 + MEM_OFFSET3, PAGE_SIZE, &crc)) -+ goto err; -+ -+ pass(); -+ -+ return 0; -+err: -+ if (waitpid(-1, NULL, WNOHANG) == 0) { -+ kill(pid, SIGTERM); -+ wait(NULL); -+ } -+ return 1; -+} -diff --git a/test/zdtm/customization/maps01.desc b/test/zdtm/customization/maps01.desc -new file mode 100644 -index 0000000..dad462e ---- /dev/null -+++ b/test/zdtm/customization/maps01.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'flavor': 'h', 'opts': '--pin-memory --use-fork-pid', 'flags': 'suid', 'cmdline': 'pinmemory max_pin_pid_num'} -diff --git a/test/zdtm/customization/maps02.c b/test/zdtm/customization/maps02.c -new file mode 100644 -index 0000000..eb7c09b ---- /dev/null -+++ b/test/zdtm/customization/maps02.c -@@ -0,0 +1,111 @@ -+#include -+#include "zdtmtst.h" -+#include "get_smaps_bits.h" -+ -+#ifndef MADV_DONTDUMP -+#define MADV_DONTDUMP 16 -+#endif -+ -+const char *test_doc = "Test shared memory with advises"; -+const char *test_author = "Cyrill Gorcunov "; -+ -+struct mmap_data { -+ void *start; -+ unsigned long orig_flags; -+ unsigned long orig_madv; -+ unsigned long new_flags; -+ unsigned long new_madv; -+}; -+ -+#define MEM_SIZE (8192) -+ -+static int alloc_anon_mmap(struct mmap_data *m, int flags, int adv) -+{ -+ m->start = mmap(NULL, MEM_SIZE, PROT_READ | PROT_WRITE, -+ flags, -1, 0); -+ if (m->start == MAP_FAILED) { -+ pr_perror("mmap failed"); -+ return -1; -+ } -+ -+ if (madvise(m->start, MEM_SIZE, adv)) { -+ if (errno == EINVAL) { -+ test_msg("madvise failed, no kernel support\n"); -+ munmap(m->start, MEM_SIZE); -+ *m = (struct mmap_data){ }; -+ } else { -+ pr_perror("madvise failed"); -+ return -1; -+ } -+ } -+ -+ return 0; -+} -+ -+int main(int argc, char **argv) -+{ -+ struct mmap_data m[5] = { }; -+ size_t i; -+ -+ test_init(argc, argv); -+ -+ test_msg("Alloc growsdown\n"); -+ if (alloc_anon_mmap(&m[0], MAP_PRIVATE | MAP_ANONYMOUS, MADV_DONTFORK)) -+ return -1; -+ -+ test_msg("Alloc locked/sequential\n"); -+ if (alloc_anon_mmap(&m[1], MAP_PRIVATE | MAP_ANONYMOUS | MAP_LOCKED, MADV_SEQUENTIAL)) -+ return -1; -+ -+ test_msg("Alloc noreserve/dontdump\n"); -+ if (alloc_anon_mmap(&m[2], MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, MADV_DONTDUMP)) -+ return -1; -+ -+ test_msg("Alloc hugetlb/hugepage\n"); -+ if (alloc_anon_mmap(&m[3], MAP_PRIVATE | MAP_ANONYMOUS, MADV_HUGEPAGE)) -+ return -1; -+ -+ test_msg("Alloc dontfork/random|mergeable\n"); -+ if (alloc_anon_mmap(&m[4], MAP_PRIVATE | MAP_ANONYMOUS, MADV_MERGEABLE)) -+ return -1; -+ -+ test_msg("Fetch existing flags/adv\n"); -+ for (i = 0; i < sizeof(m)/sizeof(m[0]); i++) { -+ if (get_smaps_bits((unsigned long)m[i].start, -+ &m[i].orig_flags, -+ &m[i].orig_madv)) -+ return -1; -+ } -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ test_msg("Fetch restored flags/adv\n"); -+ for (i = 0; i < sizeof(m)/sizeof(m[0]); i++) { -+ if (get_smaps_bits((unsigned long)m[i].start, -+ &m[i].new_flags, -+ &m[i].new_madv)) -+ return -1; -+ -+ if (m[i].orig_flags != m[i].new_flags) { -+ pr_perror("Flags are changed %lx %lx -> %lx (%zu)", -+ (unsigned long)m[i].start, -+ m[i].orig_flags, m[i].new_flags, i); -+ fail(); -+ return -1; -+ } -+ -+ if (m[i].orig_madv != m[i].new_madv) { -+ pr_perror("Madvs are changed %lx %lx -> %lx (%zu)", -+ (unsigned long)m[i].start, -+ m[i].orig_madv, m[i].new_madv, i); -+ fail(); -+ return -1; -+ } -+ -+ } -+ -+ pass(); -+ -+ return 0; -+} -diff --git a/test/zdtm/customization/maps02.desc b/test/zdtm/customization/maps02.desc -new file mode 100644 -index 0000000..f14d661 ---- /dev/null -+++ b/test/zdtm/customization/maps02.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'opts': '--pin-memory --use-fork-pid', 'flavor': 'h', 'cmdline': 'pinmemory max_pin_pid_num'} -diff --git a/test/zdtm/customization/maps04.c b/test/zdtm/customization/maps04.c -new file mode 100644 -index 0000000..780c566 ---- /dev/null -+++ b/test/zdtm/customization/maps04.c -@@ -0,0 +1,57 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "zdtmtst.h" -+ -+#define MEM_SIZE (1L << 29) -+ -+const char *test_doc = "Test big mappings"; -+const char *test_author = "Andrew Vagin -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "zdtmtst.h" -+ -+const char *test_doc = "Create a bunch of small VMAs and test they survive transferring\n"; -+const char *test_author = "Cyrill Gorcunov "; -+ -+#define NR_MAPS 4096 -+ -+#define NR_MAPS_1 (NR_MAPS + 0) -+#define NR_MAPS_2 (NR_MAPS + 1) -+ -+#define MAPS_SIZE_1 (140 << 10) -+#define MAPS_SIZE_2 (8192) -+ -+int main(int argc, char *argv[]) -+{ -+ void *map[NR_MAPS + 2] = { }, *addr; -+ size_t i, summary; -+ -+ test_init(argc, argv); -+ -+ summary = NR_MAPS * 2 * 4096 + MAPS_SIZE_1 + MAPS_SIZE_2 + (1 << 20); -+ -+ addr = mmap(NULL, summary, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); -+ if (addr == MAP_FAILED) { -+ pr_perror("Can't mmap"); -+ return 1; -+ } -+ munmap(addr, summary); -+ -+ for (i = 0; i < NR_MAPS; i++) { -+ map[i] = mmap(i > 0 ? map[i - 1] + 8192 : addr, 4096, PROT_READ | PROT_WRITE, -+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); -+ if (map[i] == MAP_FAILED) { -+ pr_perror("Can't mmap"); -+ return 1; -+ } else { -+ /* Dirtify it */ -+ int *v = (void *)map[i]; -+ *v = i; -+ } -+ } -+ -+ map[NR_MAPS_1] = mmap(map[NR_MAPS_1 - 1] + 8192, MAPS_SIZE_1, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN, -1, 0); -+ if (map[NR_MAPS_1] == MAP_FAILED) { -+ pr_perror("Can't mmap"); -+ return 1; -+ } else { -+ /* Dirtify it */ -+ int *v = (void *)map[NR_MAPS_1]; -+ *v = i; -+ test_msg("map-1: %p %p\n", map[NR_MAPS_1], map[NR_MAPS_1] + MAPS_SIZE_1); -+ } -+ -+ map[NR_MAPS_2] = mmap(map[NR_MAPS_1] + MAPS_SIZE_1, MAPS_SIZE_2, PROT_READ | PROT_WRITE, -+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_GROWSDOWN, -1, 0); -+ if (map[NR_MAPS_2] == MAP_FAILED) { -+ pr_perror("Can't mmap"); -+ return 1; -+ } else { -+ /* Dirtify it */ -+ int *v = (void *)map[NR_MAPS_2]; -+ *v = i; -+ test_msg("map-2: %p %p\n", map[NR_MAPS_2], map[NR_MAPS_2] + MAPS_SIZE_2); -+ } -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ for (i = 0; i < NR_MAPS; i++) { -+ int *v = (void *)map[i]; -+ -+ if (*v != i) { -+ fail("Data corrupted at page %lu", (unsigned long)i); -+ return 1; -+ } -+ } -+ -+ pass(); -+ return 0; -+} -diff --git a/test/zdtm/customization/maps05.desc b/test/zdtm/customization/maps05.desc -new file mode 100644 -index 0000000..f14d661 ---- /dev/null -+++ b/test/zdtm/customization/maps05.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'opts': '--pin-memory --use-fork-pid', 'flavor': 'h', 'cmdline': 'pinmemory max_pin_pid_num'} -diff --git a/test/zdtm/customization/maps06.c b/test/zdtm/customization/maps06.c -new file mode 100644 -index 0000000..7480d6b ---- /dev/null -+++ b/test/zdtm/customization/maps06.c -@@ -0,0 +1,70 @@ -+#include "zdtmtst.h" -+#include -+#include -+#include -+#include -+ -+const char *test_doc = "Create a lot of file vma-s"; -+const char *test_author = "Andrei Vagin "; -+ -+char *filename; -+TEST_OPTION(filename, string, "file name", 1); -+ -+int main(int argc, char ** argv) -+{ -+ void *start; -+ int fd, i; -+ int ps = sysconf(_SC_PAGESIZE); -+ int test_size; -+ -+ test_init(argc, argv); -+ -+ fd = open(filename, O_RDWR | O_CREAT, 0666); -+ if (fd < 0) -+ return 1; -+ -+ ftruncate(fd, ps); -+ -+ if (ps == 0x1000) -+ test_size = 10240; -+ else -+ test_size = 512; -+ -+ start = mmap(0, ps * test_size * 4, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); -+ if (start == MAP_FAILED) -+ return 1; -+ -+ for (i = 0; i < test_size; i++) { -+ int *addr; -+ addr = mmap(start + i * 3 * ps, ps, -+ PROT_READ | PROT_WRITE, -+ MAP_PRIVATE | MAP_FILE | MAP_FIXED, fd, 0); -+ if (addr == MAP_FAILED) -+ return 1; -+ addr[0] = i * 2; -+ addr = mmap(start + (i * 3 + 1) * ps, ps, -+ PROT_READ | PROT_WRITE, -+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); -+ if (addr == MAP_FAILED) -+ return 1; -+ addr[0] = i; -+ } -+ -+ test_daemon(); -+ -+ test_waitsig(); -+ -+ for (i = 0; i < test_size; i++) { -+ int *addr; -+ addr = start + i * 3 * ps; -+ if (addr[0] != i * 2) -+ fail(); -+ addr = start + (i * 3 + 1) * ps; -+ if (addr[0] != i) -+ fail(); -+ } -+ -+ pass(); -+ -+ return 0; -+} -diff --git a/test/zdtm/customization/maps06.desc b/test/zdtm/customization/maps06.desc -new file mode 100644 -index 0000000..f14d661 ---- /dev/null -+++ b/test/zdtm/customization/maps06.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'opts': '--pin-memory --use-fork-pid', 'flavor': 'h', 'cmdline': 'pinmemory max_pin_pid_num'} -diff --git a/test/zdtm/customization/maps_file_prot.c b/test/zdtm/customization/maps_file_prot.c -new file mode 100644 -index 0000000..3b28c1f ---- /dev/null -+++ b/test/zdtm/customization/maps_file_prot.c -@@ -0,0 +1,53 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "zdtmtst.h" -+ -+const char *test_doc = "Test mappings of same file with different prot"; -+const char *test_author = "Jamie Liu "; -+ -+char *filename; -+TEST_OPTION(filename, string, "file name", 1); -+ -+#define die(fmt, arg...) do { pr_perror(fmt, ## arg); return 1; } while (0) -+ -+int main(int argc, char ** argv) -+{ -+ void *ro_map, *rw_map; -+ int fd; -+ -+ test_init(argc, argv); -+ -+ fd = open(filename, O_RDWR | O_CREAT, 0644); -+ if (fd < 0) -+ die("open failed"); -+ if (ftruncate(fd, 2 * PAGE_SIZE)) -+ die("ftruncate failed"); -+ -+ ro_map = mmap(NULL, 2 * PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); -+ if (ro_map == MAP_FAILED) -+ die("mmap failed"); -+ rw_map = ro_map + PAGE_SIZE; -+ if (mprotect(rw_map, PAGE_SIZE, PROT_READ | PROT_WRITE)) -+ die("mprotect failed"); -+ -+ close(fd); -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ /* Check that rw_map is still writeable */ -+ *(volatile char *)rw_map = 1; -+ -+ if (mprotect(ro_map, PAGE_SIZE, PROT_READ | PROT_WRITE)) { -+ fail("mprotect after restore failed"); -+ return 1; -+ } -+ -+ pass(); -+ return 0; -+} -diff --git a/test/zdtm/customization/maps_file_prot.desc b/test/zdtm/customization/maps_file_prot.desc -new file mode 100644 -index 0000000..0ec4023 ---- /dev/null -+++ b/test/zdtm/customization/maps_file_prot.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'opts': '--pin-memory --use-fork-pid', 'flavor': 'h'} -diff --git a/test/zdtm_ct.c b/test/zdtm_ct.c -index e8d45a9..3bbd3a3 100644 ---- a/test/zdtm_ct.c -+++ b/test/zdtm_ct.c -@@ -9,6 +9,7 @@ - #include - #include - #include -+#include - #include - - #ifndef CLONE_NEWTIME -@@ -95,13 +96,23 @@ int main(int argc, char **argv) - { - pid_t pid; - int status; -+ char *val = getenv("ZDTM_NO_PID_NS"); -+ int flags = CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC; -+ -+ /* -+ * Some customizing mechanism don't support pid namespace, -+ * so every customizing feature testcase will set -+ * 'ZDTM_NO_PID_NS' environment value. -+ */ -+ if (val == NULL || strcmp(val, "1") != 0) -+ flags |= CLONE_NEWPID; - - /* - * pidns is used to avoid conflicts - * mntns is used to mount /proc - * net is used to avoid conflicts of parasite sockets - */ -- if (unshare(CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC)) -+ if (unshare(flags)) - return 1; - pid = fork(); - if (pid == 0) { --- -2.34.1 - diff --git a/0064-zdtm-init-notifier-testcase.patch b/0064-zdtm-init-notifier-testcase.patch deleted file mode 100644 index 6cd6aca542678ed8fc8e677ba815b7e57a9f11e5..0000000000000000000000000000000000000000 --- a/0064-zdtm-init-notifier-testcase.patch +++ /dev/null @@ -1,620 +0,0 @@ -From 8c7cfce7c9f90af9314b96c6ec34c97fb6f9be8a Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Mon, 14 Feb 2022 19:11:15 +0800 -Subject: [PATCH 64/72] zdtm: init notifier testcase - -Signed-off-by: fu.lin ---- - test/zdtm.py | 70 ++++++++-- - test/zdtm/customization/Makefile | 3 +- - test/zdtm/customization/notifier00.c | 68 ++++++++++ - test/zdtm/customization/notifier00.desc | 1 + - test/zdtm/mod/.gitignore | 163 ++++++++++++++++++++++++ - test/zdtm/mod/Makefile | 28 ++++ - test/zdtm/mod/notifier.c | 145 +++++++++++++++++++++ - 7 files changed, 466 insertions(+), 12 deletions(-) - create mode 100644 test/zdtm/customization/notifier00.c - create mode 100644 test/zdtm/customization/notifier00.desc - create mode 100644 test/zdtm/mod/.gitignore - create mode 100644 test/zdtm/mod/Makefile - create mode 100644 test/zdtm/mod/notifier.c - -diff --git a/test/zdtm.py b/test/zdtm.py -index d3b146f..d64a683 100755 ---- a/test/zdtm.py -+++ b/test/zdtm.py -@@ -25,6 +25,7 @@ import tempfile - import time - import socket - import pathlib -+import platform - from builtins import (input, int, open, range, str, zip) - - import pycriu as crpc -@@ -1466,6 +1467,13 @@ class criu: - return True - return False - -+ @staticmethod -+ def check_sysfs(pathes): -+ for path in pathes.split(): -+ if not pathlib.Path(path).exists(): -+ return True -+ return False -+ - @staticmethod - def available(): - if not os.access(opts['criu_bin'], os.X_OK): -@@ -1991,21 +1999,49 @@ class Launcher: - testline = u"ok %d - %s # SKIP %s" % (self.__runtest, name, reason) - print(testline, file=self.__file_report) - -+ def check_module(self, mod): -+ found = False -+ with open("/proc/modules") as f: -+ for line in f.readlines(): -+ if "pin_memory" == line.split()[0]: -+ found = True -+ return found -+ - def modprobe_pin_memory(self, load): -+ mod = "pin_memory" - if not load: - return -- else: -- found = False -- with open("/proc/modules") as f: -- for line in f.readlines(): -- if "pin_memory" == line.split()[0]: -- found = True -- if not found: -- subprocess.check_call(["modprobe", "pin_memory"]) -+ elif not self.check_module(mod): -+ subprocess.check_call(["modprobe", mod]) - - cmd = [opts["criu_bin"], "init-pagemap-read"] - subprocess.check_call(cmd, shell=False) - -+ def build_and_load_mod(self, target, kdir): -+ if platform.machine() != "aarch64" or not target: -+ return -+ -+ if not os.access("zdtm/mod", os.R_OK): -+ print("should be executed in the test subdir") -+ sys.exit(0) -+ -+ dirpath = f"MOD={os.getcwd()}/zdtm/mod" -+ build_mod = ["make", "-C", "zdtm/mod", dirpath, target] -+ if kdir: -+ build_mod.append(f"KDIR={kdir}") -+ subprocess.check_call(build_mod) -+ -+ # ensure the module has been unloaded -+ if self.check_module(target.rstrip(".ko")): -+ subprocess.run(["rmmod", target], check=False) -+ -+ modpath = f"zdtm/mod/{target}" -+ subprocess.check_call(["insmod", modpath]) -+ -+ def unload_mod(self, mod): -+ if mod: -+ subprocess.check_call(["rmmod", mod]) -+ - def run_test(self, name, desc, flavor): - - if len(self.__subs) >= self.__max: -@@ -2014,9 +2050,9 @@ class Launcher: - with open("/proc/sys/kernel/tainted") as taintfd: - taint = taintfd.read() - # 0x1000 means the out of tree module has been loaded -- if self.__taint != taint and (int(self.__taint) | 0x1000) != int(taint): -+ if self.__taint != taint and (int(self.__taint) | 0x3000) != int(taint): - raise Exception("The kernel is tainted: %r (%r)" % -- (taint, self.__taint)) -+ (taint, str(int(self.__taint) | 0x3000))) - - if test_flag(desc, 'excl'): - self.wait_all() -@@ -2045,6 +2081,8 @@ class Launcher: - # `--use-fork-pid`, so don't care `--pin-memory` option - self.modprobe_pin_memory(no_pid_ns) - -+ self.build_and_load_mod(desc.get("mod", ""), opts["kdir"]) -+ - sub = subprocess.Popen(["./zdtm_ct", "zdtm.py"], - env=dict(os.environ, CR_CT_TEST_INFO=arg, - ZDTM_NO_PID_NS=zdtm_no_pid_ns), -@@ -2059,9 +2097,11 @@ class Launcher: - } - - # pin memory function don't support concurrency -- if test_flag(desc, 'excl') or test_value(desc, "opts", "--pin-memory"): -+ if test_flag(desc, 'excl') or test_value(desc, "opts", "--pin-memory") or desc.get("mod", ""): - self.wait() - -+ self.unload_mod(desc.get("mod", "")) -+ - def __wait_one(self, flags): - pid = -1 - status = -1 -@@ -2412,6 +2452,11 @@ def run_tests(opts): - t, f"cmdline '{cmdline}' isn't support, or don't set") - continue - -+ sysfs = tdesc.get('sysfs', '') -+ if sysfs and criu.check_sysfs(sysfs): -+ launcher.skip(t, f"sysfs file {sysfs} don't exist") -+ continue -+ - test_flavs = tdesc.get('flavor', 'h ns uns').split() - opts_flavs = (opts['flavor'] or 'h,ns,uns').split(',') - if opts_flavs != ['best']: -@@ -2434,6 +2479,7 @@ def run_tests(opts): - launcher.run_test(t, tdesc, run_flavs) - else: - launcher.skip(t, "no flavors") -+ - finally: - fail = launcher.finish() - if opts['join_ns']: -@@ -2723,6 +2769,8 @@ rp.add_argument("--pre-dump-mode", - rp.add_argument("--kdat", - help="Path to criu.kdat, default '/run/criu.kdat'", - default="/run/criu.kdat") -+rp.add_argument( -+ "--kdir", help="specific kernel devel path, the default value is `/lib/modules/$(uname -r)/build`") - - lp = sp.add_parser("list", help="List tests") - lp.set_defaults(action=list_tests) -diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile -index 82348f2..93922c7 100644 ---- a/test/zdtm/customization/Makefile -+++ b/test/zdtm/customization/Makefile -@@ -10,7 +10,8 @@ TST_NOFILE = \ - maps04 \ - maps05 \ - maps007 \ -- maps008 -+ maps008 \ -+ notifier00 - - TST_FILE = \ - maps00 \ -diff --git a/test/zdtm/customization/notifier00.c b/test/zdtm/customization/notifier00.c -new file mode 100644 -index 0000000..5fc3d54 ---- /dev/null -+++ b/test/zdtm/customization/notifier00.c -@@ -0,0 +1,68 @@ -+#include -+/* Historical reasons: in order to compatible with R10 */ -+#define CONFIG_EULEROS_MODRESTORE_NOTIFY -+#include -+ -+#include "zdtmtst.h" -+ -+const char *test_doc = "Tests the basic function of the notifiers"; -+static char *nvwa_notifiers[] = { -+ "PRE_FREEZE", -+ "FREEZE_TO_KILL", -+ "PRE_UPDATE_KERNEL", -+ "POST_UPDATE_KERNEL", -+ "UNFREEZE_TO_RUN", -+ "POST_RUN" -+}; -+ -+_Static_assert(sizeof(nvwa_notifiers)/sizeof(nvwa_notifiers[0]) == KUP_HOOK_MAX, "nvwa_notifiers number is wrong!"); -+ -+int main(int argc, char *argv[]) -+{ -+ int orig_values[KUP_HOOK_MAX] = {0}; -+ bool failure = false; -+ FILE *fp; -+ -+ test_init(argc, argv); -+ -+ fp = fopen("/sys/kernel/criu_notifier", "r"); -+ if (fp == NULL) { -+ pr_perror("fopen"); -+ return 1; -+ } -+ -+ for (int i = 0; i < KUP_HOOK_MAX; i++) -+ fscanf(fp, "%d ", orig_values+i); -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ if (fseek(fp, 0, SEEK_SET) != 0) { -+ pr_perror("fseek"); -+ return 2; -+ } -+ -+ for (int i = 0; i < KUP_HOOK_MAX; i++) { -+ int val = 0; -+ int should = orig_values[i]+1; -+ -+ fscanf(fp, "%d ", &val); -+ -+ /* those are not called in criu */ -+ if (i == PRE_UPDATE_KERNEL || i == POST_UPDATE_KERNEL) -+ continue; -+ -+ if (val != should) { -+ pr_err("%s notifier is abnormal, it should be %d, but %d.\n", -+ nvwa_notifiers[i], should, val); -+ failure = true; -+ } -+ } -+ -+ if (failure) -+ fail("notifier is abnormal."); -+ else -+ pass(); -+ -+ return 0; -+} -diff --git a/test/zdtm/customization/notifier00.desc b/test/zdtm/customization/notifier00.desc -new file mode 100644 -index 0000000..1c6b512 ---- /dev/null -+++ b/test/zdtm/customization/notifier00.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'opts': '--with-notifier', 'flavor': 'h', 'flags': 'suid', 'sysfs': '/sys/kernel/modrestore/nvwa_notifier', 'mod': 'notifier.ko'} -diff --git a/test/zdtm/mod/.gitignore b/test/zdtm/mod/.gitignore -new file mode 100644 -index 0000000..7afd412 ---- /dev/null -+++ b/test/zdtm/mod/.gitignore -@@ -0,0 +1,163 @@ -+# SPDX-License-Identifier: GPL-2.0-only -+# -+# NOTE! Don't add files that are generated in specific -+# subdirectories here. Add them in the ".gitignore" file -+# in that subdirectory instead. -+# -+# NOTE! Please use 'git ls-files -i --exclude-standard' -+# command after changing this file, to see if there are -+# any tracked files which get ignored after the change. -+# -+# Normal rules (sorted alphabetically) -+# -+.* -+*.a -+*.asn1.[ch] -+*.bin -+*.bz2 -+*.c.[012]*.* -+*.dt.yaml -+*.dtb -+*.dtbo -+*.dtb.S -+*.dwo -+*.elf -+*.gcno -+*.gz -+*.i -+*.ko -+*.lex.c -+*.ll -+*.lst -+*.lz4 -+*.lzma -+*.lzo -+*.mod -+*.mod.c -+*.o -+*.o.* -+*.patch -+*.s -+*.so -+*.so.dbg -+*.su -+*.symtypes -+*.symversions -+*.tab.[ch] -+*.tar -+*.xz -+*.zst -+Module.symvers -+modules.order -+ -+# -+# Top-level generic files -+# -+/linux -+/modules-only.symvers -+/vmlinux -+/vmlinux.32 -+/vmlinux.map -+/vmlinux.symvers -+/vmlinux-gdb.py -+/vmlinuz -+/System.map -+/Module.markers -+/modules.builtin -+/modules.builtin.modinfo -+/modules.nsdeps -+ -+# -+# RPM spec file (make rpm-pkg) -+# -+/*.spec -+ -+# -+# Debian directory (make deb-pkg) -+# -+/debian/ -+ -+# -+# Snap directory (make snap-pkg) -+# -+/snap/ -+ -+# -+# tar directory (make tar*-pkg) -+# -+/tar-install/ -+ -+# -+# We don't want to ignore the following even if they are dot-files -+# -+!.clang-format -+!.cocciconfig -+!.get_maintainer.ignore -+!.gitattributes -+!.gitignore -+!.mailmap -+ -+# -+# Generated include files -+# -+/include/config/ -+/include/generated/ -+/include/ksym/ -+/arch/*/include/generated/ -+ -+# stgit generated dirs -+patches-* -+ -+# quilt's files -+patches -+series -+ -+# ctags files -+tags -+TAGS -+ -+# cscope files -+cscope.* -+ncscope.* -+ -+# gnu global files -+GPATH -+GRTAGS -+GSYMS -+GTAGS -+ -+# id-utils files -+ID -+ -+*.orig -+*~ -+\#*# -+ -+# -+# Leavings from module signing -+# -+extra_certificates -+signing_key.pem -+signing_key.priv -+signing_key.x509 -+x509.genkey -+ -+# Kconfig presets -+/all.config -+/alldef.config -+/allmod.config -+/allno.config -+/allrandom.config -+/allyes.config -+ -+# Kconfig savedefconfig output -+/defconfig -+ -+# Kdevelop4 -+*.kdev4 -+ -+# Clang's compilation database file -+/compile_commands.json -+ -+# Documentation toolchain -+sphinx_*/ -diff --git a/test/zdtm/mod/Makefile b/test/zdtm/mod/Makefile -new file mode 100644 -index 0000000..10c9c9a ---- /dev/null -+++ b/test/zdtm/mod/Makefile -@@ -0,0 +1,28 @@ -+# notice: -+# `ARCH` var is used in both criu and kernel, but they have the different value -+# for the same architecture(e.g. arm64). Therefore, this Makefile can't be -+# included in the criu Makefile. -+obj-m += notifier.o -+ -+# specific the kernel devel path -+# example (use `/home/me/kernel` as `KDIR`): -+# $ export KDIR="/home/me/kernel" -+ifeq ($(KDIR),) -+ KDIR := /lib/modules/$(shell uname -r)/build -+endif -+ -+# specific the mod src path -+ifeq ($(MOD),) -+ MOD := $(PWD) -+endif -+ -+all: -+ $(MAKE) -C $(KDIR) M=$(MOD) modules -+ -+clean: -+ $(MAKE) -C $(KDIR) M=$(MOD) clean -+ -+.PHONY: all clean -+ -+notifier.ko: -+ $(MAKE) -C $(KDIR) M=$(MOD) notifier.ko -diff --git a/test/zdtm/mod/notifier.c b/test/zdtm/mod/notifier.c -new file mode 100644 -index 0000000..70a5b33 ---- /dev/null -+++ b/test/zdtm/mod/notifier.c -@@ -0,0 +1,145 @@ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+ -+static int values[KUP_HOOK_MAX]; -+static char *nvwa_actions[] = { -+ "PREPARE", -+ "ROLLBACK", -+}; -+static char *nvwa_notifiers[] = { -+ "PRE_FREEZE", -+ "FREEZE_TO_KILL", -+ "PRE_UPDATE_KERNEL", -+ "POST_UPDATE_KERNEL", -+ "UNFREEZE_TO_RUN", -+ "POST_RUN" -+}; -+ -+static int nvwa_notifier_func(struct notifier_block *nb, unsigned long val, void *data) -+{ -+ struct nvwa_action *action = data; -+ -+ switch (action->cmd) { -+ case PREPARE: -+ values[val] += 1; -+ break; -+ case ROLLBACK: -+ values[val] -= 1; -+ break; -+ default: -+ pr_err("invalid cmd: %d", action->cmd); -+ return NOTIFY_BAD; -+ } -+ -+ pr_info("nvwa notifier action %s", nvwa_actions[action->cmd]); -+ -+ return NOTIFY_DONE; -+} -+ -+#define DEFINE_NVWA_NB(name) \ -+ static struct notifier_block nvwa_##name##_nb = { \ -+ .notifier_call = nvwa_notifier_func, \ -+ } -+ -+DEFINE_NVWA_NB(pre_freeze); -+DEFINE_NVWA_NB(freeze_to_kill); -+DEFINE_NVWA_NB(pre_update_kernel); -+DEFINE_NVWA_NB(post_update_kernel); -+DEFINE_NVWA_NB(unfreeze_to_run); -+DEFINE_NVWA_NB(post_run); -+ -+static struct notifier_block *nvwa_nbs[] = { -+ &nvwa_pre_freeze_nb, -+ &nvwa_freeze_to_kill_nb, -+ &nvwa_pre_update_kernel_nb, -+ &nvwa_post_update_kernel_nb, -+ &nvwa_unfreeze_to_run_nb, -+ &nvwa_post_run_nb, -+}; -+ -+static int register_nvwa_notifiers(void) -+{ -+ int i; -+ -+ BUILD_BUG_ON_MSG(ARRAY_SIZE(nvwa_nbs) != KUP_HOOK_MAX, -+ "wrong nvwa notifier block size!"); -+ -+ for (i = 0; i < ARRAY_SIZE(nvwa_nbs); i++) { -+ if (register_nvwa_notifier(i, nvwa_nbs[i]) != 0) { -+ pr_err("register nvwa %s notifier failed!", nvwa_notifiers[i]); -+ goto error; -+ } -+ } -+ -+ return 0; -+ -+error: -+ -+ for (i -= 1; i >= 0; i -= 1) -+ unregister_nvwa_notifier(i, nvwa_nbs[i]); -+ -+ return -1; -+} -+ -+static void unregister_nvwa_notifiers(void) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(nvwa_nbs); i++) -+ unregister_nvwa_notifier(i, nvwa_nbs[i]); -+} -+ -+static ssize_t criu_notifier_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(values); i++) -+ values[i] = 0; -+ -+ return count; -+} -+ -+static ssize_t criu_notifier_show(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ char *buf) -+{ -+ int i; -+ ssize_t count = 0; -+ -+ for (i = 0; i < ARRAY_SIZE(values); i++) -+ count += sprintf(buf+count, "%d ", values[i]); -+ -+ buf[count-1] = '\n'; -+ -+ return count; -+} -+ -+static struct kobj_attribute notifier_file = __ATTR_RW(criu_notifier); -+ -+static int __init notifier_init(void) -+{ -+ if (register_nvwa_notifiers() != 0) -+ return -1; -+ -+ if (sysfs_create_file(kernel_kobj, ¬ifier_file.attr) != 0) { -+ unregister_nvwa_notifiers(); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+static void __exit notifier_exit(void) -+{ -+ sysfs_remove_file(kernel_kobj, ¬ifier_file.attr); -+ unregister_nvwa_notifiers(); -+} -+ -+module_init(notifier_init); -+module_exit(notifier_exit); -+MODULE_LICENSE("GPL"); --- -2.34.1 - diff --git a/0065-zdtm-print-errno-info-when-accessing-.out-failure.patch b/0065-zdtm-print-errno-info-when-accessing-.out-failure.patch deleted file mode 100644 index da1bcf5756ed3ea61331e2fc185a9ea3d33619b8..0000000000000000000000000000000000000000 --- a/0065-zdtm-print-errno-info-when-accessing-.out-failure.patch +++ /dev/null @@ -1,35 +0,0 @@ -From d17aedda384cfe6940b9948f4db36643495e0375 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Tue, 15 Feb 2022 11:31:27 +0800 -Subject: [PATCH 65/72] zdtm: print errno info when accessing *.out failure - -The line `Output file *.out appears to exist, aborting` is confusing. -The one common reason is permission denied because of the test desc -is lack of suid flag. The zdtm.py will set `ZDTM_UID` and `ZDTM_GID`, -the function `test_init()` (in `zdtm/lib/test.c`) will change tester -itself to that uid and gid if no suid flag. - -Here print the errno when access *.out failed. - -Signed-off-by: fu.lin ---- - test/zdtm/lib/test.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c -index 81da81e..471980d 100644 ---- a/test/zdtm/lib/test.c -+++ b/test/zdtm/lib/test.c -@@ -74,7 +74,8 @@ static void test_fini(void) - static void setup_outfile(void) - { - if (!access(outfile, F_OK) || errno != ENOENT) { -- fprintf(stderr, "Output file %s appears to exist, aborting\n", outfile); -+ fprintf(stderr, "Output file %s appears to exist, aborting: %s\n", -+ outfile, strerror(errno)); - exit(1); - } - --- -2.34.1 - diff --git a/0066-zdtm-print-more-info-for-fs.c.patch b/0066-zdtm-print-more-info-for-fs.c.patch deleted file mode 100644 index 523645696b71e41267d61084b0730a610b40e3d3..0000000000000000000000000000000000000000 --- a/0066-zdtm-print-more-info-for-fs.c.patch +++ /dev/null @@ -1,43 +0,0 @@ -From af97bc76b1dc1e6ca2b924d7e5666dd04a1847b2 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Wed, 16 Feb 2022 10:39:06 +0800 -Subject: [PATCH 66/72] zdtm: print more info for fs.c - ---- - test/zdtm/lib/fs.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/test/zdtm/lib/fs.c b/test/zdtm/lib/fs.c -index 7b8be5f..a716b40 100644 ---- a/test/zdtm/lib/fs.c -+++ b/test/zdtm/lib/fs.c -@@ -4,6 +4,7 @@ - #include - #include - #include -+#include - - #include "zdtmtst.h" - #include "fs.h" -@@ -103,11 +104,15 @@ int get_cwd_check_perm(char **result) - } - - if (access(cwd, X_OK)) { -- pr_err("access check for bit X for current dir path '%s' " -- "failed for uid:%d,gid:%d, error: %d(%s). " -+ struct stat sb; -+ -+ stat(cwd, &sb); -+ pr_err("access check for bit X for current dir path '%s'(uid:%d,gid:%d,mode:%o) " -+ "failed for uid:%d,gid:%d,euid:%d, error: %d(%s). " - "Bit 'x' should be set in all path components of " - "this directory\n", -- cwd, getuid(), getgid(), errno, strerror(errno)); -+ cwd, sb.st_uid, sb.st_gid, sb.st_mode, getuid(), getgid(), -+ geteuid(), errno, strerror(errno)); - return -1; - } - --- -2.34.1 - diff --git a/0067-zdtm-add-chardev-testcase.patch b/0067-zdtm-add-chardev-testcase.patch deleted file mode 100644 index 024f78e70503de82ebc04d1bf0fb5c5cf83455f3..0000000000000000000000000000000000000000 --- a/0067-zdtm-add-chardev-testcase.patch +++ /dev/null @@ -1,288 +0,0 @@ -From c44c68028f22751ef12fac02567008a16e992fea Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Thu, 17 Feb 2022 14:30:03 +0800 -Subject: [PATCH 67/72] zdtm: add chardev testcase - -- char dev `ioctl({IOCTL_CMD_NEEDREPAIR, IOCTL_CMD_REPAIR})` - checkpoint/restore test -- anonymous inode checkpoint/restore test ---- - test/zdtm/customization/Makefile | 3 +- - test/zdtm/customization/chardev00.c | 65 +++++++++++ - test/zdtm/customization/chardev00.desc | 1 + - test/zdtm/mod/Makefile | 5 +- - test/zdtm/mod/anon_inode.c | 148 +++++++++++++++++++++++++ - 5 files changed, 220 insertions(+), 2 deletions(-) - create mode 100644 test/zdtm/customization/chardev00.c - create mode 100644 test/zdtm/customization/chardev00.desc - create mode 100644 test/zdtm/mod/anon_inode.c - -diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile -index 93922c7..7d08db3 100644 ---- a/test/zdtm/customization/Makefile -+++ b/test/zdtm/customization/Makefile -@@ -11,7 +11,8 @@ TST_NOFILE = \ - maps05 \ - maps007 \ - maps008 \ -- notifier00 -+ notifier00 \ -+ chardev00 - - TST_FILE = \ - maps00 \ -diff --git a/test/zdtm/customization/chardev00.c b/test/zdtm/customization/chardev00.c -new file mode 100644 -index 0000000..c708699 ---- /dev/null -+++ b/test/zdtm/customization/chardev00.c -@@ -0,0 +1,65 @@ -+#include -+#include -+#include -+#include -+#include -+#include "zdtmtst.h" -+ -+#define CHARDEV_PATH "/dev/anon_test" -+ -+const char *test_doc="Tests char dev and anonmous inode map checkpoint/restore"; -+ -+static int check_maps(unsigned long addr) -+{ -+ FILE *fp = fopen("/proc/self/maps", "r"); -+ char *line = NULL; -+ size_t n = 0; -+ unsigned long start = 0; -+ -+ if (fp == NULL) { -+ pr_perror("open self maps failed"); -+ return -1; -+ } -+ -+ while (getline(&line, &n, fp) != -1) { -+ test_msg("%s", line); -+ sscanf(line, "%lx-", &start); -+ if (start == addr) -+ return 0; -+ } -+ -+ return -1; -+} -+ -+int main(int argc, char *argv[]) -+{ -+ int fd, retval = 0; -+ unsigned long addr; -+ -+ test_init(argc, argv); -+ -+ fd = open(CHARDEV_PATH, O_RDWR); -+ if (fd < 0) { -+ pr_perror("open '%s' failed", CHARDEV_PATH); -+ return -1; -+ } -+ -+ retval = ioctl(fd, 0, &addr); -+ if (retval < 0) { -+ pr_perror("create anonymous map failed"); -+ retval = -1; -+ goto out; -+ } -+ test_msg("create anonymous vma start 0x%lx\n", addr); -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ retval = check_maps(addr); -+ if (retval == 0) -+ pass(); -+ else -+ fail("anonymous inode map don't restore"); -+out: -+ return retval; -+} -diff --git a/test/zdtm/customization/chardev00.desc b/test/zdtm/customization/chardev00.desc -new file mode 100644 -index 0000000..9c51ba8 ---- /dev/null -+++ b/test/zdtm/customization/chardev00.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'opts': '--dump-char-dev', 'flavor': 'h', 'flags': 'suid excl', 'sysfs': '/sys/kernel/modrestore/anon_state_restore /sys/kernel/repairing_device', 'mod': 'anon_inode.ko'} -diff --git a/test/zdtm/mod/Makefile b/test/zdtm/mod/Makefile -index 10c9c9a..0bc89f7 100644 ---- a/test/zdtm/mod/Makefile -+++ b/test/zdtm/mod/Makefile -@@ -2,7 +2,7 @@ - # `ARCH` var is used in both criu and kernel, but they have the different value - # for the same architecture(e.g. arm64). Therefore, this Makefile can't be - # included in the criu Makefile. --obj-m += notifier.o -+obj-m += notifier.o anon_inode.o - - # specific the kernel devel path - # example (use `/home/me/kernel` as `KDIR`): -@@ -26,3 +26,6 @@ clean: - - notifier.ko: - $(MAKE) -C $(KDIR) M=$(MOD) notifier.ko -+ -+anon_inode.ko: -+ $(MAKE) -C $(KDIR) M=$(MOD) anon_inode.ko -diff --git a/test/zdtm/mod/anon_inode.c b/test/zdtm/mod/anon_inode.c -new file mode 100644 -index 0000000..d9c7d2a ---- /dev/null -+++ b/test/zdtm/mod/anon_inode.c -@@ -0,0 +1,148 @@ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static int anon_mmap(struct file *file, struct vm_area_struct *vma) -+{ -+ pr_info("call %s\n", __func__); -+ return 0; -+} -+ -+static const struct file_operations none_fops = { -+ .owner = THIS_MODULE, -+ .mmap = anon_mmap, -+}; -+ -+static unsigned long create_mmap(void) -+{ -+ struct file *filp; -+ unsigned long start; -+ -+ pr_info("call %s\n", __func__); -+ filp = anon_inode_getfile("test", &none_fops, NULL, O_RDWR); -+ if (IS_ERR(filp)) { -+ pr_warn("anon_inode_getfile('test') failed: %d\n", (int)PTR_ERR(filp)); -+ return PTR_ERR(filp); -+ } -+ -+ start = vm_mmap(filp, 0, 1<<20, PROT_READ | PROT_WRITE, MAP_SHARED, 0); -+ if (IS_ERR_VALUE(start)) { -+ pr_warn("vm_mmap failed with: %d\n", (int)PTR_ERR((void *)start)); -+ } -+ -+ fput(filp); -+ -+ return start; -+} -+ -+static int anon_inode_notifier(struct notifier_block *nb, -+ unsigned long action, void *data) -+{ -+ struct vma_anon_entry *vma_entry = data; -+ struct file *filp; -+ unsigned long start; -+ -+ filp = anon_inode_getfile("test", &none_fops, NULL, O_RDWR); -+ if (IS_ERR(filp)) { -+ pr_warn("anon_inode_getfile('test') failed: %d\n", (int)PTR_ERR(filp)); -+ return 0; -+ } -+ -+ start = vm_mmap(filp, vma_entry->start, vma_entry -> end-vma_entry->start, -+ PROT_READ | PROT_WRITE, MAP_SHARED, 0); -+ if (start != vma_entry->start) -+ pr_warn("vm_mmap() failed: %#lx\n", start); -+ -+ fput(filp); -+ return 0; -+} -+ -+static long anon_ioctl(struct file *file, unsigned int cmd, unsigned long argp) -+{ -+ unsigned long start; -+ -+ switch (cmd) { -+ case 0: -+ start = create_mmap(); -+ if (IS_ERR_VALUE(start)) -+ return -EINVAL; -+ if (put_user(start, (unsigned long __user *)argp)) -+ return -EFAULT; -+ break; -+ case IOCTL_CMD_NEEDREPAIR: -+ pr_info("call IOCTL_CMD_NEEDREPAIR"); -+ /* do nothing, just a request slot */ -+ return 17173; -+ case IOCTL_CMD_REPAIR: -+ pr_info("call IOCTL_CMD_REPAIR"); -+ /* do nothing, just a request slot */ -+ break; -+ default: -+ pr_warn("wrong cmd\n"); -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+static const struct file_operations anon_fops = { -+ .owner = THIS_MODULE, -+ .unlocked_ioctl = anon_ioctl, -+ .compat_ioctl = anon_ioctl, -+}; -+ -+static struct miscdevice anon_dev = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "anon_test", -+ .fops = &anon_fops, -+}; -+ -+static struct notifier_block anon_inode_nb = { -+ .notifier_call = anon_inode_notifier, -+}; -+ -+static int __init anon_init(void) -+{ -+ int retval; -+ -+ retval = mures_add_devname(anon_dev.name); -+ if (retval != 0) -+ goto out; -+ -+ retval = register_anon_notifier(&anon_inode_nb); -+ if (retval != 0) -+ goto del_devname; -+ -+ retval = misc_register(&anon_dev); -+ if (retval != 0) -+ goto del_notifier; -+ -+ return 0; -+ -+del_notifier: -+ unregister_anon_notifier(&anon_inode_nb); -+del_devname: -+ mures_del_devname(anon_dev.name); -+out: -+ return retval; -+} -+ -+static void __exit anon_exit(void) -+{ -+ mures_del_devname(anon_dev.name); -+ unregister_anon_notifier(&anon_inode_nb); -+ misc_deregister(&anon_dev); -+ return; -+} -+ -+module_init(anon_init); -+module_exit(anon_exit); -+MODULE_LICENSE("GPL"); --- -2.34.1 - diff --git a/0068-zdtm-add-infiniband-testcase.patch b/0068-zdtm-add-infiniband-testcase.patch deleted file mode 100644 index 975c6bbac66510a3a2f11b1515706fe0a5dc1ec6..0000000000000000000000000000000000000000 --- a/0068-zdtm-add-infiniband-testcase.patch +++ /dev/null @@ -1,256 +0,0 @@ -From f7e452ffc5318b2aac8aabde5dd8b7bee910c6f7 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Thu, 17 Feb 2022 14:59:37 +0800 -Subject: [PATCH 68/72] zdtm: add infiniband testcase - ---- - test/zdtm/customization/Makefile | 4 +- - .../customization/infiniband_with_unix_sk.c | 55 ++++++++ - .../infiniband_with_unix_sk.desc | 1 + - test/zdtm/mod/Makefile | 5 +- - test/zdtm/mod/infiniband_kern.c | 121 ++++++++++++++++++ - 5 files changed, 184 insertions(+), 2 deletions(-) - create mode 100644 test/zdtm/customization/infiniband_with_unix_sk.c - create mode 100644 test/zdtm/customization/infiniband_with_unix_sk.desc - create mode 100644 test/zdtm/mod/infiniband_kern.c - -diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile -index 7d08db3..728646b 100644 ---- a/test/zdtm/customization/Makefile -+++ b/test/zdtm/customization/Makefile -@@ -12,7 +12,8 @@ TST_NOFILE = \ - maps007 \ - maps008 \ - notifier00 \ -- chardev00 -+ chardev00 \ -+ infiniband_with_unix_sk - - TST_FILE = \ - maps00 \ -@@ -61,6 +62,7 @@ wait_stop: - $(TST): | $(LIB) - - maps02: get_smaps_bits.o -+infiniband_with_unix_sk: LDFLAGS += -lpthread - - %: %.sh - cp $< $@ -diff --git a/test/zdtm/customization/infiniband_with_unix_sk.c b/test/zdtm/customization/infiniband_with_unix_sk.c -new file mode 100644 -index 0000000..4a9e108 ---- /dev/null -+++ b/test/zdtm/customization/infiniband_with_unix_sk.c -@@ -0,0 +1,55 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "zdtmtst.h" -+ -+#define DEV_PATH "/dev/infiniband_test" -+ -+const char *test_doc = "test infiniband fd checkpoint/restore, and the conflict condition with the half-closing anonymous unix socket"; -+ -+static int fd; -+static int sv[2]; -+ -+static void *wait(void *arg) { -+ while (true) { -+ test_msg("sleep...\n"); -+ sleep(1); -+ } -+ -+ return NULL; -+} -+ -+int main(int argc, char *argv[]) { -+ pthread_t thread; -+ -+ test_init(argc, argv); -+ -+ if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sv) < 0) { -+ pr_perror("socketpair"); -+ return -1; -+ } -+ printf("sv[0]: %d sv[1]: %d\n", sv[0], sv[1]); -+ -+ if ((fd = open(DEV_PATH, O_RDWR)) < 0) { -+ pr_perror("open"); -+ return -1; -+ } -+ if (close(sv[1]) < 0) { -+ pr_perror("close"); -+ return -1; -+ } -+ -+ pthread_create(&thread, NULL, wait, NULL); -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ pass(); -+ -+ return 0; -+} -diff --git a/test/zdtm/customization/infiniband_with_unix_sk.desc b/test/zdtm/customization/infiniband_with_unix_sk.desc -new file mode 100644 -index 0000000..43a93e6 ---- /dev/null -+++ b/test/zdtm/customization/infiniband_with_unix_sk.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'opts': '--dump-char-dev', 'flavor': 'h', 'flags': 'suid excl', 'sysfs': '/sys/kernel/repairing_device', 'mod': 'infiniband_kern.ko'} -diff --git a/test/zdtm/mod/Makefile b/test/zdtm/mod/Makefile -index 0bc89f7..58f9a27 100644 ---- a/test/zdtm/mod/Makefile -+++ b/test/zdtm/mod/Makefile -@@ -2,7 +2,7 @@ - # `ARCH` var is used in both criu and kernel, but they have the different value - # for the same architecture(e.g. arm64). Therefore, this Makefile can't be - # included in the criu Makefile. --obj-m += notifier.o anon_inode.o -+obj-m += notifier.o anon_inode.o infiniband_kern.o - - # specific the kernel devel path - # example (use `/home/me/kernel` as `KDIR`): -@@ -29,3 +29,6 @@ notifier.ko: - - anon_inode.ko: - $(MAKE) -C $(KDIR) M=$(MOD) anon_inode.ko -+ -+infiniband_kern.ko: -+ $(MAKE) -C $(KDIR) M=$(MOD) infiniband_kern.ko -diff --git a/test/zdtm/mod/infiniband_kern.c b/test/zdtm/mod/infiniband_kern.c -new file mode 100644 -index 0000000..a61df3a ---- /dev/null -+++ b/test/zdtm/mod/infiniband_kern.c -@@ -0,0 +1,121 @@ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+static const struct file_operations none_fops = { -+ .owner = THIS_MODULE, -+}; -+ -+static const struct file_operations anonfd_fops = { -+ .owner = THIS_MODULE, -+}; -+ -+static int infiniband_open(struct inode *inode, struct file *filp) -+{ -+ long fd; -+ -+ if (!!(filp->f_flags & O_REPAIR)) { -+ pr_info("reuse\n"); -+ return 0; -+ } -+ -+ fd = anon_inode_getfd("[infinibandevent]", &anonfd_fops, NULL, 0); -+ if (fd < 0) -+ return fd; -+ else -+ filp->private_data = (void *)fd; -+ -+ return 0; -+} -+ -+static int infiniband_repair(struct file *filp, int from) -+{ -+ struct file *fp; -+ long fd; -+ int retval = 0; -+ -+ fp = anon_inode_getfile("[infinibandevent]", &anonfd_fops, NULL, 0); -+ if (IS_ERR(fp)) -+ return PTR_ERR(fp); -+ -+ fd = mures_f_dupfd(from, fp, 0); -+ if (fd != from) { -+ pr_err("different fd, old: %d, dup: %ld\n", from, fd); -+ retval = -EEXIST; -+ } -+ fput(fp); -+ filp->private_data = (long *)fd; -+ -+ return retval; -+} -+ -+static long infiniband_ioctl(struct file *filp, unsigned int cmd, unsigned long argp) -+{ -+ long retval = 0; -+ -+ switch (cmd) { -+ case IOCTL_CMD_NEEDREPAIR: -+ retval = (long )filp->private_data; -+ break; -+ case IOCTL_CMD_REPAIR: -+ retval = infiniband_repair(filp, argp); -+ break; -+ default: -+ pr_warn("wrong cmd\n"); -+ return -EINVAL; -+ } -+ return retval; -+} -+ -+static const struct file_operations infiniband_fops = { -+ .owner = THIS_MODULE, -+ .open = infiniband_open, -+ .unlocked_ioctl = infiniband_ioctl, -+ .compat_ioctl = infiniband_ioctl, -+}; -+ -+static struct miscdevice infiniband_dev = { -+ .minor = MISC_DYNAMIC_MINOR, -+ .name = "infiniband_test", -+ .fops = &infiniband_fops, -+}; -+ -+static int __init infiniband_init(void) -+{ -+ int retval; -+ -+ retval = mures_add_devname(infiniband_dev.name); -+ if (retval != 0) -+ goto out; -+ -+ retval = misc_register(&infiniband_dev); -+ if (retval != 0) -+ goto del_devname; -+ -+ return 0; -+ -+del_devname: -+ mures_del_devname(infiniband_dev.name); -+out: -+ return retval; -+} -+ -+static void __exit infiniband_exit(void) -+{ -+ mures_del_devname(infiniband_dev.name); -+ misc_deregister(&infiniband_dev); -+ return; -+} -+ -+module_init(infiniband_init); -+module_exit(infiniband_exit); -+MODULE_LICENSE("GPL"); --- -2.34.1 - diff --git a/0069-zdtm-add-share-port-testcase.patch b/0069-zdtm-add-share-port-testcase.patch deleted file mode 100644 index a7440b3a5d44bcee570c42ebcb25da48ea340521..0000000000000000000000000000000000000000 --- a/0069-zdtm-add-share-port-testcase.patch +++ /dev/null @@ -1,145 +0,0 @@ -From b766a8d6b04e9c358cd221b68405a205156c1fe2 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Thu, 17 Feb 2022 17:19:46 +0800 -Subject: [PATCH 69/72] zdtm: add share port testcase - ---- - test/zdtm/customization/Makefile | 3 +- - test/zdtm/customization/tcp00.c | 101 +++++++++++++++++++++++++++++ - test/zdtm/customization/tcp00.desc | 1 + - 3 files changed, 104 insertions(+), 1 deletion(-) - create mode 100644 test/zdtm/customization/tcp00.c - create mode 100644 test/zdtm/customization/tcp00.desc - -diff --git a/test/zdtm/customization/Makefile b/test/zdtm/customization/Makefile -index 728646b..1111908 100644 ---- a/test/zdtm/customization/Makefile -+++ b/test/zdtm/customization/Makefile -@@ -13,7 +13,8 @@ TST_NOFILE = \ - maps008 \ - notifier00 \ - chardev00 \ -- infiniband_with_unix_sk -+ infiniband_with_unix_sk \ -+ tcp00 - - TST_FILE = \ - maps00 \ -diff --git a/test/zdtm/customization/tcp00.c b/test/zdtm/customization/tcp00.c -new file mode 100644 -index 0000000..d1ead82 ---- /dev/null -+++ b/test/zdtm/customization/tcp00.c -@@ -0,0 +1,101 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "zdtmtst.h" -+ -+#define PORT 17173 -+ -+const char *test_doc = "Test TCP SO_REUSEADDR checkpoint/restore using `share_{src,dst}_ports`"; -+ -+static int sock_bind_and_listen(void) -+{ -+ int serv_sk; -+ int optval = 1; -+ struct sockaddr_in serv = { -+ .sin_family = AF_INET, -+ .sin_addr.s_addr = htonl(INADDR_ANY), -+ .sin_port = htons(PORT), -+ }; -+ -+ serv_sk = socket(AF_INET, SOCK_STREAM, 0); -+ if (serv_sk < 0) { -+ pr_perror("server socket failed"); -+ exit(1); -+ } -+ -+ if (setsockopt(serv_sk, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) < 0) { -+ pr_perror("setsockopt"); -+ exit(1); -+ } -+ -+ if (bind(serv_sk, (struct sockaddr *)&serv, sizeof(serv)) < 0) { -+ pr_perror("bind"); -+ exit(1); -+ } -+ -+ if (listen(serv_sk, 5) != 0) { -+ pr_perror("listen"); -+ exit(1); -+ } -+ -+ return serv_sk; -+} -+ -+static void client_connect(void) -+{ -+ int sk; -+ struct sockaddr_in sockaddr = { -+ .sin_family = AF_INET, -+ }; -+ -+ sk = socket(AF_INET, SOCK_STREAM, 0); -+ if (sk < 0) { -+ pr_perror("client socket failed"); -+ exit(1); -+ } -+ -+ sockaddr.sin_addr.s_addr = inet_addr("127.0.0.1"); -+ sockaddr.sin_port = htons(PORT); -+ -+ if (connect(sk, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) { -+ pr_perror("connect failed"); -+ exit(1); -+ } -+ -+ close(sk); -+} -+ -+int main(int argc, char *argv[]) -+{ -+ int serv_sk; -+ int optval = 0; -+ socklen_t len = sizeof(optval); -+ -+ test_init(argc, argv); -+ -+ serv_sk = sock_bind_and_listen(); -+ -+ test_msg("listen 0.0.0.0: %d\n", PORT); -+ /* create CLOSE-WAIT status socket */ -+ client_connect(); -+ -+ test_daemon(); -+ test_waitsig(); -+ -+ if (getsockopt(serv_sk, SOL_SOCKET, SO_REUSEADDR, &optval, &len) != 0) { -+ pr_perror("getsockopt failed"); -+ return -1; -+ } -+ -+ if (optval != 1) { -+ pr_err("SO_REUSEADDR flag is %d, should 1", optval); -+ } else -+ pass(); -+ -+ return 0; -+} -\ No newline at end of file -diff --git a/test/zdtm/customization/tcp00.desc b/test/zdtm/customization/tcp00.desc -new file mode 100644 -index 0000000..bc3b4a8 ---- /dev/null -+++ b/test/zdtm/customization/tcp00.desc -@@ -0,0 +1 @@ -+{'arch': 'aarch64', 'opts': '--use-fork-pid --share-src-ports=17173 --share-dst-ports=17173 --skip-in-flight', 'flavor': 'h', 'sysfs': '/sys/kernel/repair_share_socket'} --- -2.34.1 - diff --git a/0070-zdtm-tmp-test-script.patch b/0070-zdtm-tmp-test-script.patch deleted file mode 100644 index 51a1a835125c891dc89148ffc8c5817dae990a2b..0000000000000000000000000000000000000000 --- a/0070-zdtm-tmp-test-script.patch +++ /dev/null @@ -1,59 +0,0 @@ -From a4f00a225ebfed401aed49956eefad391071d0ce Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Thu, 17 Feb 2022 11:02:08 +0800 -Subject: [PATCH 70/72] zdtm: tmp test script - ---- - test/jenkins/criu-lib.sh | 2 +- - test/jenkins/criu-test.sh | 26 ++++++++++++++++++++++++++ - 2 files changed, 27 insertions(+), 1 deletion(-) - create mode 100644 test/jenkins/criu-test.sh - -diff --git a/test/jenkins/criu-lib.sh b/test/jenkins/criu-lib.sh -index 72d41b5..89dc936 100644 ---- a/test/jenkins/criu-lib.sh -+++ b/test/jenkins/criu-lib.sh -@@ -15,7 +15,7 @@ function prep() - - ulimit -c unlimited && - export CFLAGS=-g -- git clean -dfx && -+# git clean -dfx && - make -j 4 && - make -j 4 -C test/zdtm/ && - make -C test zdtm_ct && -diff --git a/test/jenkins/criu-test.sh b/test/jenkins/criu-test.sh -new file mode 100644 -index 0000000..3035f21 ---- /dev/null -+++ b/test/jenkins/criu-test.sh -@@ -0,0 +1,26 @@ -+#!/bin/bash -+ -+set -e -+source `dirname $0`/criu-lib.sh -+prep -+ -+rm -rf /var/run/criu.kdat -+ -+make zdtm -+ -+if [ -z $(grep 58467 /etc/group) ]; then -+ groupadd -g 58467 zdtm -+fi -+if [ -z $(grep 58467 /etc/passwd) ]; then -+ useradd -u 18943 -g 58467 zdtm -+fi -+ -+#./test/zdtm.py run --all --keep-going --report report -f h --ignore-taint --parallel 1 --load-pinmem-dev || fail -+ -+#./test/zdtm.py run -t zdtm/static/del_standalone_un --keep-going -f h --ignore-taint --parallel 1 --load-pinmem-dev --keep-img always -+ -+./test/zdtm.py run -t zdtm/customization/chardev00 -t zdtm/customization/notifier00 --keep-going -f h --ignore-taint --parallel 1 --load-pinmem-dev --keep-img always -+ -+#./test/zdtm.py run -t zdtm/static/socket-tcp-nfconntrack --join-ns --keep-going --ignore-taint --parallel 1 --load-pinmem-dev --keep-img always -+ -+./test/zdtm.py run -t zdtm/customization/tcp00 --keep-going -f h --ignore-taint --parallel 1 --load-pinmem-dev --keep-img always --- -2.34.1 - diff --git a/0071-mod-add-criu-indepent-test.patch b/0071-mod-add-criu-indepent-test.patch deleted file mode 100644 index f44537bc313e9be2601f2d4506ee89b77fc09dec..0000000000000000000000000000000000000000 --- a/0071-mod-add-criu-indepent-test.patch +++ /dev/null @@ -1,512 +0,0 @@ -From 03d188c492efe079a520319ca48e40843367ddcf Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Fri, 18 Feb 2022 16:22:00 +0800 -Subject: [PATCH 71/72] mod: add criu-indepent test - -Signed-off-by: fu.lin ---- - test/modules/Makefile | 21 ++++++ - test/modules/idr.c | 79 +++++++++++++++++++++ - test/modules/jump_table.c | 107 ++++++++++++++++++++++++++++ - test/modules/var_kern.c | 72 +++++++++++++++++++ - test/modules/var_user.py | 40 +++++++++++ - test/modules/workqueue_kern.c | 130 ++++++++++++++++++++++++++++++++++ - 6 files changed, 449 insertions(+) - create mode 100644 test/modules/Makefile - create mode 100644 test/modules/idr.c - create mode 100644 test/modules/jump_table.c - create mode 100644 test/modules/var_kern.c - create mode 100644 test/modules/var_user.py - create mode 100644 test/modules/workqueue_kern.c - -diff --git a/test/modules/Makefile b/test/modules/Makefile -new file mode 100644 -index 0000000..9458aa7 ---- /dev/null -+++ b/test/modules/Makefile -@@ -0,0 +1,21 @@ -+obj-m := var_kern.o workqueue_kern.o jump_table.o idr.o -+ -+KDIR := /lib/modules/`uname -r`/build -+ -+all: -+ make -C $(KDIR) M=$(PWD) modules -+ -+clean: -+ make -C $(KDIR) M=$(PWD) clean -+ -+var_kern.ko: -+ make -C $(KDIR) M=$(PWD) var_kern.ko -+ -+workqueue_kern.ko: -+ make -C $(KDIR) M=$(PWD) workqueue_kern.ko -+ -+jump_table.ko: -+ make -C $(KDIR) M=$(PWD) jump_table.ko -+ -+idr.ko: -+ make -C $(KDIR) M=$(PWD) idr.ko -diff --git a/test/modules/idr.c b/test/modules/idr.c -new file mode 100644 -index 0000000..67f248e ---- /dev/null -+++ b/test/modules/idr.c -@@ -0,0 +1,79 @@ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+ -+DEFINE_IDR(idr_head); -+const int placeholder = 0; -+static int idr_uid = 0; -+ -+static int idr_test_show_internal(int id, void *p, void *data) -+{ -+ pr_info("id: %d p %pK\n", id, p); -+ sprintf(data+strlen(data), "%d\n", id); -+ return 0; -+} -+ -+static ssize_t idr_test_show(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ char *buf) -+{ -+ idr_for_each(&idr_head, idr_test_show_internal, buf); -+ return strlen(buf); -+} -+ -+static ssize_t idr_test_store(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ const char *buf, size_t count) -+{ -+ const unsigned long max = 65536; -+ unsigned id = 0; -+ int retval; -+ -+ if (sscanf(buf, "%u", &id) != 1) { -+ pr_err("sscanf empty\n"); -+ return -EINVAL; -+ } -+ -+ retval = idr_alloc_u32(&idr_head, (void *)&placeholder, &id, max, GFP_KERNEL); -+ pr_info("alloc idr id %u, errno %d\n", id, retval); -+ return retval < 0 ? retval : count; -+} -+ -+static struct kobj_attribute idr_test = __ATTR_RW(idr_test); -+ -+static int __init mod_init(void) -+{ -+ return sysfs_create_file(kernel_kobj, &idr_test.attr); -+} -+ -+static void __exit mod_exit(void) -+{ -+ sysfs_remove_file(kernel_kobj, &idr_test.attr); -+ idr_destroy(&idr_head); -+ return; -+} -+ -+static int __init mod_resume(void) -+{ -+ int retval = mures_restore_idr(idr_uid, &idr_head); -+ -+ if (retval == 0) -+ retval = sysfs_create_file(kernel_kobj, &idr_test.attr); -+ return retval; -+} -+ -+static int __exit mod_suspend(void) -+{ -+ sysfs_remove_file(kernel_kobj, &idr_test.attr); -+ return mures_save_idr(idr_uid, &idr_head); -+} -+ -+module_init(mod_init); -+module_exit(mod_exit); -+module_resume(mod_resume); -+module_suspend(mod_suspend); -+ -+MODULE_LICENSE("GPL"); -\ No newline at end of file -diff --git a/test/modules/jump_table.c b/test/modules/jump_table.c -new file mode 100644 -index 0000000..8648c2a ---- /dev/null -+++ b/test/modules/jump_table.c -@@ -0,0 +1,107 @@ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+#include -+ -+struct func_node { -+ struct hlist_node hash; -+ unsigned long key; -+ unsigned long value; -+}; -+ -+static int status __attribute__((section(".resume_0"))); -+ -+/* -+ * The `mures_vcall()` can't used in irq context because of the implementation. -+ * Therefore, we must generate cache. -+ */ -+DEFINE_HASHTABLE(__ro_after_init cache, 2); -+ -+static int foo(void) -+{ -+ status += 1; -+ return status; -+} -+ -+static void *find_func(unsigned long addr); -+ -+static ssize_t jp_test_show(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ char *buf) -+{ -+ int (*func)(void) = find_func((unsigned long)foo); -+ ssize_t count = 0; -+ -+ if (func == NULL) { -+ count = sprintf(buf, "Not Found\n"); -+ } else { -+ count = sprintf(buf, "%d", func()); -+ } -+ -+ return count; -+} -+ -+static struct kobj_attribute jp_test = __ATTR_RO(jp_test); -+ -+struct func_node nodes[] __ro_after_init = { -+ { .key = (unsigned long)foo, }, -+}; -+ -+static void *find_func(unsigned long addr) -+{ -+ struct func_node *obj; -+ int i; -+ -+ pr_info("finding addr: %lx\n", addr); -+ hash_for_each(cache, i, obj, hash) {\ -+ pr_info("found key: %lx, val: %lx\n", obj->key, obj->value); -+ if (obj->key == addr) -+ return (void *)obj->value; -+ } -+ -+ return NULL; -+} -+ -+static void __init build_cache(void) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(nodes); i++) { -+ nodes[i].value = mures_vcall(nodes[i].key); -+ hash_add(cache, &nodes[i].hash, nodes[i].key); -+ } -+} -+ -+static int __init mod_init(void) -+{ -+ build_cache(); -+ return sysfs_create_file(kernel_kobj, &jp_test.attr); -+} -+ -+static void __exit mod_exit(void) -+{ -+ sysfs_remove_file(kernel_kobj, &jp_test.attr); -+ return; -+} -+ -+static int __init mod_resume(void) -+{ -+ build_cache(); -+ return sysfs_create_file(kernel_kobj, &jp_test.attr); -+} -+ -+static int __exit mod_suspend(void) -+{ -+ sysfs_remove_file(kernel_kobj, &jp_test.attr); -+ return 0; -+} -+ -+module_init(mod_init); -+module_exit(mod_exit); -+module_resume(mod_resume); -+module_suspend(mod_suspend); -+ -+MODULE_LICENSE("GPL"); -\ No newline at end of file -diff --git a/test/modules/var_kern.c b/test/modules/var_kern.c -new file mode 100644 -index 0000000..4321e3b ---- /dev/null -+++ b/test/modules/var_kern.c -@@ -0,0 +1,72 @@ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+ -+/* test variable persistence */ -+ -+static int mod_int __attribute__((section(".resume_0"))); -+static char *mod_str1 __attribute__((section(".resume_1"))) = "init"; -+static char *mod_str2 __attribute__((section(".resume_2"))) = "upgrade"; -+static char *mod_str __attribute__((section(".resume_3"))); -+ -+static ssize_t var_test_show(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ char *buf) -+{ -+ ssize_t count = 0; -+ -+ count += sprintf(buf, "%d", mod_int); -+ count += sprintf(buf+count, " %s", mod_str); -+ -+ return count; -+} -+ -+static struct kobj_attribute sysfs_var = __ATTR_RO(var_test); -+ -+static __init int mod1_resume(void) -+{ -+ mod_int += 1; -+ mod_str = mod_str2; -+ -+ pr_info("This is %s, index %d\n", __func__, mod_int); -+ -+ return sysfs_create_file(kernel_kobj, &sysfs_var.attr); -+} -+ -+static __exit int mod1_suspend(void) -+{ -+ mod_int += 1; -+ -+ pr_info("This is %s, index %d\n", __func__, mod_int); -+ sysfs_remove_file(kernel_kobj, &sysfs_var.attr); -+ -+ return 0; -+} -+ -+static __init int mod1_init(void) -+{ -+ mod_int = 0; -+ mod_str = mod_str1; -+ -+ pr_info("This is %s, index %d\n", __func__, mod_int); -+ -+ return sysfs_create_file(kernel_kobj, &sysfs_var.attr); -+} -+ -+static __exit void mod1_exit(void) -+{ -+ mod_int += 1; -+ -+ pr_info("This is %s, index %d\n", __func__, mod_int); -+ sysfs_remove_file(kernel_kobj, &sysfs_var.attr); -+ -+ return; -+} -+ -+module_resume(mod1_resume); -+module_suspend(mod1_suspend); -+module_init(mod1_init); -+module_exit(mod1_exit); -+MODULE_LICENSE("GPL"); -diff --git a/test/modules/var_user.py b/test/modules/var_user.py -new file mode 100644 -index 0000000..98c5193 ---- /dev/null -+++ b/test/modules/var_user.py -@@ -0,0 +1,40 @@ -+import unittest -+import subprocess -+ -+ -+class TestVarMethods(unittest.TestCase): -+ mod_name = "var_kern" -+ -+ def unload_mod(self): -+ with open("/proc/modules") as f: -+ for line in f.readlines(): -+ words = line.split() -+ if words[0] == self.mod_name: -+ subprocess.check_call(["rmmod", self.mod_name]) -+ break -+ -+ def setUp(self): -+ subprocess.check_call(["make", "var_kern.ko"]) -+ self.unload_mod() -+ -+ def tearDown(self): -+ mod = f"{self.mod_name}.ko" -+ self.unload_mod() -+ -+ def test_var(self): -+ mod = f"{self.mod_name}.ko" -+ subprocess.check_call(["insmod", mod]) -+ with open("/sys/kernel/var_test") as f: -+ line = f.readline() -+ self.assertEqual(line, "0 init") -+ subprocess.check_call(["rmmod", "-r", mod]) -+ subprocess.check_call(["rmmod", mod]) -+ subprocess.check_call(["insmod", "-r", mod]) -+ with open("/sys/kernel/var_test") as f: -+ line = f.readline() -+ self.assertEqual(line, "2 upgrade") -+ subprocess.check_call(["rmmod", mod]) -+ -+ -+if __name__ == '__main__': -+ unittest.main() -diff --git a/test/modules/workqueue_kern.c b/test/modules/workqueue_kern.c -new file mode 100644 -index 0000000..cecfb8c ---- /dev/null -+++ b/test/modules/workqueue_kern.c -@@ -0,0 +1,130 @@ -+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+struct mod_status { -+ struct workqueue_struct *wq; -+}; -+ -+static struct workqueue_struct *wq; -+static int wq_status __attribute__((section(".resume_0"))); -+ -+static void worker_func(struct work_struct *work) -+{ -+ wq_status += 1; -+ pr_info("worker run...\n"); -+ mdelay(100); -+ pr_info("worker end.\n"); -+ kfree(work); -+} -+ -+static ssize_t wq_test_show(struct kobject *kobj, -+ struct kobj_attribute *attr, -+ char *buf) -+{ -+ flush_workqueue(wq); -+ return sprintf(buf, "%pK %d", wq, wq_status); -+} -+ -+static struct kobj_attribute wq_test = __ATTR_RO(wq_test); -+ -+static int __init mod_init(void) -+{ -+ int retval; -+ -+ retval = sysfs_create_file(kernel_kobj, &wq_test.attr); -+ if (retval != 0) { -+ pr_err("sysfs_create_file failed.\n"); -+ return retval; -+ } -+ -+ wq = alloc_workqueue("workqueue_kern_test", WQ_UNBOUND, 0); -+ if (wq == NULL) { -+ pr_err("unable to allocate workqueue\n"); -+ sysfs_remove_file(kernel_kobj, &wq_test.attr); -+ retval = -ENOMEM; -+ goto out; -+ } -+ -+ retval = 0; -+out: -+ return retval; -+} -+ -+static void __exit mod_exit(void) -+{ -+ destroy_workqueue(wq); -+ sysfs_remove_file(kernel_kobj, &wq_test.attr); -+} -+ -+static int __init mod_resume(void) -+{ -+ struct mod_status *data; -+ int retval; -+ -+ data = get_module_state_space(KBUILD_MODNAME, NULL); -+ if (!data) { -+ pr_info("get_module_state_space failure\n"); -+ return -ENOMEM; -+ } -+ wq = data->wq; -+ -+ retval = sysfs_create_file(kernel_kobj, &wq_test.attr); -+ if (retval != 0) { -+ pr_err("sysfs_create_file failed.\n"); -+ return retval; -+ } -+ -+ return resume_workqueue(wq); -+} -+ -+static int __exit queue_worker(void) -+{ -+ struct delayed_work *worker = kzalloc(sizeof(struct work_struct), GFP_KERNEL); -+ -+ if (worker == NULL) { -+ pr_err("alloc worker space failed\n"); -+ return -ENOMEM; -+ } -+ -+ INIT_DELAYED_WORK(worker, worker_func); -+ queue_delayed_work(wq, worker, 100); -+ return 0; -+} -+ -+static int __exit mod_suspend(void) -+{ -+ struct mod_status *data; -+ int retval; -+ -+ data = alloc_module_state_space(KBUILD_MODNAME, sizeof(*data)); -+ if (data == NULL) { -+ pr_err("alloc_module_state_space failed\n"); -+ return -ENOMEM; -+ } -+ -+ data->wq = wq; -+ if (queue_worker() != 0) -+ return -ENOMEM; -+ -+ retval = suspend_workqueue(wq); -+ if (retval != 0) { -+ pr_err("suspend workqueue failed\n"); -+ return retval; -+ } -+ -+ sysfs_remove_file(kernel_kobj, &wq_test.attr); -+ return 0; -+} -+ -+module_init(mod_init); -+module_exit(mod_exit); -+module_resume(mod_resume); -+module_suspend(mod_suspend); -+ -+MODULE_LICENSE("GPL"); -\ No newline at end of file --- -2.34.1 - diff --git a/0072-kabichk-add-KABI-check-code.patch b/0072-kabichk-add-KABI-check-code.patch deleted file mode 100644 index e468742271fa4a07cf0e1030f7996b3db00e0507..0000000000000000000000000000000000000000 --- a/0072-kabichk-add-KABI-check-code.patch +++ /dev/null @@ -1,611 +0,0 @@ -From 57f1017a9c971d8c3a5ef82d04e6c4bc584e9f00 Mon Sep 17 00:00:00 2001 -From: "fu.lin" -Date: Fri, 8 Apr 2022 16:14:40 +0800 -Subject: [PATCH 72/72] kabichk: add KABI check code - -Theory: - * The export symbol CRCs source: - - /boot/symvers-$(uname -r).gz for Image and in tree modules: the - ima mechanism could ensure the file credibility and non-tamper. - - ELF section `.symtab` for out of tree modules: the export symbols - has `__crc_` prefix, and `st_shndx` is `SHN_ABS` - * compare CRC value between the known and the module - -Design Details: - - collect export symbols from - * collect in tree symbols from `/boot/symvers-.gz` - * collect out of tree module symbols from the module self - - compare external symbols stored in `__versions` section for each module - -Usage: - python3 -m upgchk.kabichk \ - [[-r ],...] \ - [[-m ],...] \ - -c -Example: - python3 -m upgchk.kabichk -c /lib/modules/$(uname -r)/kernel/fs/mbcache.ko - python3 -m upgchk.kabichk -m notify.ko -c osp_proc.ko - -Note: - The pyelftools library can't be import, therefore using elfutils - wrapper to replace the library. - -Signed-off-by: fu.lin ---- - upgchk/Makefile | 23 ++++ - upgchk/lib/modsym.c | 268 ++++++++++++++++++++++++++++++++++++++ - upgchk/lib/modsym.h | 39 ++++++ - upgchk/setup.py | 20 +++ - upgchk/upgchk/__init__.py | 11 ++ - upgchk/upgchk/kabichk.py | 163 +++++++++++++++++++++++ - 6 files changed, 524 insertions(+) - create mode 100644 upgchk/Makefile - create mode 100644 upgchk/lib/modsym.c - create mode 100644 upgchk/lib/modsym.h - create mode 100644 upgchk/setup.py - create mode 100644 upgchk/upgchk/__init__.py - create mode 100644 upgchk/upgchk/kabichk.py - -diff --git a/upgchk/Makefile b/upgchk/Makefile -new file mode 100644 -index 0000000..df6b60e ---- /dev/null -+++ b/upgchk/Makefile -@@ -0,0 +1,23 @@ -+.PHONY: build install clean -+ -+PYTHON=/usr/bin/python3 -+TEST= -+PARAMETERS= -+ -+build: -+ ${PYTHON} setup.py build -+ -+dist: -+ ${PYTHON} setup.py sdist -+ -+install: -+ ${PYTHON} setup.py install -+ -+clean: -+ ${PYTHON} setup.py clean -+ rm -rf \ -+ build \ -+ dist \ -+ upgchk/__pycache__ \ -+ upgchk/*.so \ -+ upgchk.egg-info -diff --git a/upgchk/lib/modsym.c b/upgchk/lib/modsym.c -new file mode 100644 -index 0000000..eb75f68 ---- /dev/null -+++ b/upgchk/lib/modsym.c -@@ -0,0 +1,268 @@ -+#define PY_SSIZE_T_CLEAN -+#include -+ -+#include -+#include -+#include -+#include -+ -+#include "modsym.h" -+ -+static Elf_Data *get_elf_sec_data(Elf *elf, const char *sec_name) -+{ -+ Elf_Scn *scn = NULL; -+ size_t strndx; -+ GElf_Shdr mem; -+ GElf_Shdr *shdr; -+ const char *name; -+ -+ /* To get the section names. */ -+ if (elf_getshdrstrndx(elf, &strndx) != 0) -+ return NULL; -+ -+ while ((scn = elf_nextscn(elf, scn)) != NULL) { -+ shdr = gelf_getshdr(scn, &mem); -+ name = elf_strptr (elf, strndx, shdr->sh_name); -+ -+ if (strcmp(name, sec_name) == 0) -+ return elf_getdata(scn, NULL); -+ } -+ -+ return NULL; -+} -+ -+static void modvers_dealloc(PyObject *obj) -+{ -+ ModVersState *mvgstate = (ModVersState *)obj; -+ -+ elf_end(mvgstate->elf); -+ return; -+} -+ -+static PyObject *modvers_iternext(PyObject *obj) -+{ -+ ModVersState *mvgstate = (ModVersState *)obj; -+ struct modversion_info *info = mvgstate->d->d_buf; -+ PyObject *elem = NULL; -+ -+ if (mvgstate->seq_index >= 0) { -+ size_t i = mvgstate->enum_index; -+ /* seq_index < 0 means that the generator is exhausted. -+ * Returning NULL in this case is enough. The next() builtin -+ * will raise the StopIteration error for us. -+ */ -+ elem = Py_BuildValue("(sk)", info[i].name, info[i].crc); -+ mvgstate->seq_index -= 1; -+ mvgstate->enum_index += 1; -+ } else { -+ /* The reference to the sequence is cleared in the first -+ * generator call after its exhaustion (after the call that -+ * returned the last element). -+ * Py_CLEAR will be harmless for subsequent calls since it's -+ * idempotent on NULL. -+ */ -+ mvgstate->seq_index = -1; -+ } -+ -+ return elem; -+} -+ -+static PyObject *modvers_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) -+{ -+ ModVersState *mvgstate = NULL; -+ PyObject *file; -+ int fd; -+ Py_ssize_t len; -+ -+ if (!PyArg_ParseTuple(args, "O", &file)) -+ return NULL; -+ -+ fd = PyObject_AsFileDescriptor(file); -+ if (fd < 0) -+ return NULL; -+ -+ mvgstate = (ModVersState *)type->tp_alloc(type, 0); -+ if (mvgstate == NULL) -+ return NULL; -+ -+ elf_version(EV_CURRENT); -+ mvgstate->elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); -+ if (mvgstate->elf == NULL) { -+ PyErr_Format(PyExc_TypeError, "File not usable: %s\n", elf_errmsg(-1)); -+ goto free; -+ } -+ -+ mvgstate->d = get_elf_sec_data(mvgstate->elf, VERS_SEC_NAME); -+ if (mvgstate->d == NULL) { -+ PyErr_Format(PyExc_TypeError, "Can't find ELF section `%s`\n", VERS_SEC_NAME); -+ goto elf_end; -+ } -+ -+ len = mvgstate->d->d_size / sizeof(struct modversion_info); -+ mvgstate->seq_index = len - 1; -+ mvgstate->enum_index = 0; -+ -+ return (PyObject *)mvgstate; -+ -+elf_end: -+ elf_end(mvgstate->elf); -+free: -+ type->tp_free(mvgstate); -+ return NULL; -+} -+ -+PyTypeObject PyModVersGen_Type = { -+ PyVarObject_HEAD_INIT(NULL, 0) -+ .tp_name = "modvers", -+ .tp_basicsize = sizeof(PyModVersGen_Type), -+ .tp_itemsize = 0, -+ .tp_dealloc = modvers_dealloc, -+ .tp_flags = Py_TPFLAGS_DEFAULT, -+ .tp_iter = PyObject_SelfIter, -+ .tp_iternext = modvers_iternext, -+ .tp_alloc = PyType_GenericAlloc, -+ .tp_new = modvers_new, -+}; -+ -+static void modcrcs_dealloc(PyObject *obj) -+{ -+ ModCRCsState *mcgstate = (ModCRCsState *)obj; -+ -+ elf_end(mcgstate->elf); -+ return; -+} -+ -+static PyObject *modcrcs_iternext(PyObject *obj) -+{ -+ ModCRCsState *mcgstate = (ModCRCsState *)obj; -+ const char *strtab = mcgstate->strtab->d_buf; -+ GElf_Sym *sym = mcgstate->symtab->d_buf; -+ PyObject *elem = NULL; -+ -+ while (mcgstate->seq_index >= 0) { -+ size_t i = mcgstate->enum_index; -+ const char *name = strtab + sym[i].st_name; -+ -+ mcgstate->seq_index -= 1; -+ mcgstate->enum_index += 1; -+ -+ /* -+ * If the symbol has '__crc_' prefix and absolute value, -+ * it's export symbol, and has CRC. -+ */ -+ if (strncmp(name, CRC_SYM_PREFIX, strlen(CRC_SYM_PREFIX)) == 0 -+ && sym[i].st_shndx == SHN_ABS) { -+ elem = Py_BuildValue("(sk)", -+ name+strlen(CRC_SYM_PREFIX), -+ sym[i].st_value); -+ break; -+ } -+ } -+ -+ return elem; -+} -+ -+static PyObject *modcrcs_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) -+{ -+ ModCRCsState *mcgstate = NULL; -+ PyObject *file; -+ Elf_Data *d; -+ int fd; -+ Py_ssize_t len; -+ -+ if (!PyArg_ParseTuple(args, "O", &file)) -+ return NULL; -+ -+ fd = PyObject_AsFileDescriptor(file); -+ if (fd < 0) -+ return NULL; -+ -+ mcgstate = (ModCRCsState *)type->tp_alloc(type, 0); -+ if (mcgstate == NULL) -+ return NULL; -+ -+ elf_version(EV_CURRENT); -+ mcgstate->elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); -+ if (mcgstate->elf == NULL) { -+ PyErr_Format(PyExc_TypeError, "File not usable: %s\n", elf_errmsg(-1)); -+ goto free; -+ } -+ -+ mcgstate->strtab = get_elf_sec_data(mcgstate->elf, STRT_SEC_NAME); -+ if (mcgstate->strtab == NULL) { -+ PyErr_Format(PyExc_TypeError, "Can't find ELF section `%s`\n", STRT_SEC_NAME); -+ goto elf_end; -+ } -+ -+ mcgstate->symtab = get_elf_sec_data(mcgstate->elf, SYMT_SEC_NAME); -+ if (mcgstate->symtab == NULL) { -+ PyErr_Format(PyExc_TypeError, "Can't find ELF section `%s`\n", SYMT_SEC_NAME); -+ goto elf_end; -+ } -+ -+ len = mcgstate->symtab->d_size / sizeof(GElf_Sym); -+ mcgstate->seq_index = len - 1; -+ mcgstate->enum_index = 0; -+ -+ return (PyObject *)mcgstate; -+ -+elf_end: -+ elf_end(mcgstate->elf); -+free: -+ type->tp_free(mcgstate); -+ return NULL; -+} -+ -+PyTypeObject PyModCRCsGen_Type = { -+ PyVarObject_HEAD_INIT(NULL, 0) -+ .tp_name = "modcrcs", -+ .tp_basicsize = sizeof(PyModCRCsGen_Type), -+ .tp_itemsize = 0, -+ .tp_dealloc = modcrcs_dealloc, -+ .tp_flags = Py_TPFLAGS_DEFAULT, -+ .tp_iter = PyObject_SelfIter, -+ .tp_iternext = modcrcs_iternext, -+ .tp_alloc = PyType_GenericAlloc, -+ .tp_new = modcrcs_new, -+}; -+ -+/* Module structure */ -+/* Module structure */ -+static struct PyModuleDef modvers_module = { -+ PyModuleDef_HEAD_INIT, -+ .m_name = "modsym", -+ .m_doc = "iter `" VERS_SEC_NAME "` section items", -+ .m_size = -1, -+}; -+ -+/* Module initialization function */ -+PyMODINIT_FUNC PyInit_modsym(void) -+{ -+ PyObject *m = PyModule_Create(&modvers_module); -+ if (m == NULL) -+ return NULL; -+ -+ if (PyType_Ready(&PyModVersGen_Type) < 0) -+ return NULL; -+ -+ Py_INCREF(&PyModVersGen_Type); -+ if (PyModule_AddObject(m, PyModVersGen_Type.tp_name, -+ (PyObject *)&PyModVersGen_Type) < 0) -+ goto free_vers; -+ -+ if (PyType_Ready(&PyModCRCsGen_Type) < 0) -+ goto free_vers; -+ -+ Py_INCREF(&PyModCRCsGen_Type); -+ if (PyModule_AddObject(m, PyModCRCsGen_Type.tp_name, -+ (PyObject *)&PyModCRCsGen_Type) < 0) -+ goto free_crcs; -+ -+ return m; -+free_crcs: -+ Py_DECREF(&PyModCRCsGen_Type); -+free_vers: -+ Py_DECREF(&PyModVersGen_Type); -+ Py_DECREF(m); -+ return NULL; -+} -diff --git a/upgchk/lib/modsym.h b/upgchk/lib/modsym.h -new file mode 100644 -index 0000000..b8069c3 ---- /dev/null -+++ b/upgchk/lib/modsym.h -@@ -0,0 +1,39 @@ -+#ifndef __PYTHON_MODSYM_H__ -+#define __PYTHON_MODSYM_H__ -+ -+#include -+ -+typedef struct { -+ PyObject_HEAD -+ Py_ssize_t seq_index; -+ Py_ssize_t enum_index; -+ Elf *elf; -+ Elf_Data *d; -+} ModVersState; -+ -+#define VERS_SEC_NAME "__versions" -+ -+/* --- the following is copied from linux src --- */ -+#define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) -+#define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN -+ -+struct modversion_info { -+ unsigned long crc; -+ char name[MODULE_NAME_LEN]; -+}; -+/* --- end --- */ -+ -+typedef struct { -+ PyObject_HEAD -+ Py_ssize_t seq_index; -+ Py_ssize_t enum_index; -+ Elf *elf; -+ Elf_Data *strtab; -+ Elf_Data *symtab; -+} ModCRCsState; -+ -+#define STRT_SEC_NAME ".strtab" -+#define SYMT_SEC_NAME ".symtab" -+#define CRC_SYM_PREFIX "__crc_" -+ -+#endif /* __PYTHON_MODSYM_H__ */ -diff --git a/upgchk/setup.py b/upgchk/setup.py -new file mode 100644 -index 0000000..6758c95 ---- /dev/null -+++ b/upgchk/setup.py -@@ -0,0 +1,20 @@ -+#!/usr/bin/python3 -+# -*- coding: utf-8 -*- -+ -+from setuptools import setup, Extension -+ -+if __name__ == "__main__": -+ -+ setup(name="upgchk", -+ version="0.1", -+ description="Check the kernel upgrading environment", -+ -+ packages=["upgchk"], -+ ext_modules=[ -+ Extension("modsym", -+ sources=["lib/modsym.c"], -+ libraries=["elf"]) -+ ], -+ -+ python_requires='>=3.6', -+ ) -diff --git a/upgchk/upgchk/__init__.py b/upgchk/upgchk/__init__.py -new file mode 100644 -index 0000000..c831e1d ---- /dev/null -+++ b/upgchk/upgchk/__init__.py -@@ -0,0 +1,11 @@ -+# -*- coding: utf-8 -*- -+ -+""" -+.. module:: upgchk -+ :synopsis: Check the kernel upgrading environment -+""" -+ -+__title = "upgchk" -+__description = "Check the upgrade environment" -+__license__ = "GPL-2.0-or-later or LGPL-2.1-only" -+__version__ = "0.1" -diff --git a/upgchk/upgchk/kabichk.py b/upgchk/upgchk/kabichk.py -new file mode 100644 -index 0000000..cccacf3 ---- /dev/null -+++ b/upgchk/upgchk/kabichk.py -@@ -0,0 +1,163 @@ -+#!/usr/bin/python3 -+# -*- coding: utf-8 -*- -+ -+''' -+Theory: -+- compare CRC value between the known and the module -+- The export symbols CRC source: -+ * `/boot/symvers-.gz` for in tree modules and Image -+ - the ima mechanism could ensure the file credibility and non-tamper -+ * The `.symtab` section for out of tree modules -+ - name format: `__crc_` -+ - it's absolute value, means: `sym->st_shndx == SHN_ABS` -+ -+Design Details: -+- collect export symbols from -+ * collect in tree symbols from `/boot/symvers-.gz` -+ * collect out of tree module symbols from the module self -+- compare external symbols stored in `__versions` section for each module -+ -+`__versions` section data format: -+ -+ # define MAX_PARAM_PREFIX_LEN (64 - sizeof(unsigned long)) -+ # define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN -+ -+ struct modversion_info { -+ unsigned long crc; -+ char name[MODULE_NAME_LEN]; -+ }; -+ -+Usage: -+ python3 -m upgchk.kabichk \ -+ [[-r ],...] \ -+ [[-m ],...] \ -+ -c -+Example: -+ python3 -m upgchk.kabichk -c /lib/modules/$(uname -r)/kernel/fs/mbcache.ko -+ python3 -m upgchk.kabichk -m notify.ko -c osp_proc.ko -+''' -+ -+import argparse -+import gzip -+import pathlib -+import platform -+from typing import Tuple -+ -+import modsym -+ -+__all__ = ["KABI"] -+ -+ELF_SELFMAG = 4 -+ELF_ELFMAG = b"\177ELF" -+ -+ -+class KABI: -+ def __init__(self, version: str): -+ """ -+ read all symbols of the specific kernel -+ """ -+ self._symbols = dict() -+ filename = f"symvers-{version}.gz" -+ filepath = pathlib.Path("/boot/").joinpath(filename) -+ -+ with gzip.open(filepath, "rt") as f: -+ for line in f.readlines(): -+ # (crc, sym, loc, type) -+ (_crc, sym, loc, _) = line.split() -+ crc = int(_crc, 16) # convert hex crc to integer -+ self._insert(sym, (crc, sym, loc)) -+ -+ def _insert(self, key: str, val: Tuple[int, str, str]): -+ inst = self._symbols.get(key) -+ if inst is None: -+ self._symbols[key] = val -+ elif inst != val: -+ raise KeyError( -+ f"{key} already exits value {self._symbols[key]}, can't insert new value {val}") -+ -+ def _get(self, key: str) -> Tuple[int, str, str]: -+ return self._symbols.get(key) -+ -+ def _parse_mod_vers(self, filepath: pathlib.Path) -> Tuple[int, str]: -+ with open(filepath, "rb") as f: -+ magic = f.read(ELF_SELFMAG) -+ if magic != ELF_ELFMAG: -+ raise TypeError(f"{filepath} isn't an ELF file") -+ -+ for sym, crc in modsym.modvers(f): -+ yield (sym, crc) -+ -+ def check_mod_syms(self, filepath: pathlib.Path) -> Tuple[bool, str]: -+ if not filepath.exists(): -+ raise FileNotFoundError(f"{filepath} isn't found") -+ -+ for sym, crc in self._parse_mod_vers(filepath): -+ val = self._get(sym) -+ if val is None: -+ msg = f"symbol {sym} isn't known" -+ return (False, msg) -+ elif val[0] != crc: -+ msg = f"symbol {sym} CRC should be {hex(crc)}, but {hex(val[0])}" -+ return (False, msg) -+ -+ return (True, "") -+ -+ def _parse_mod_crcs(self, filepath: pathlib.Path) -> Tuple[int, str]: -+ with open(filepath, "rb") as f: -+ magic = f.read(ELF_SELFMAG) -+ if magic != ELF_ELFMAG: -+ raise TypeError(f"{filepath} isn't an ELF file") -+ -+ for inst in modsym.modcrcs(f): -+ yield inst -+ -+ def add_mod_crcs(self, filepath: pathlib.Path): -+ if not filepath.exists(): -+ raise FileNotFoundError(f"{filepath} isn't found") -+ -+ modname = filepath.name[:-3] -+ for (sym, crc) in self._parse_mod_crcs(filepath): -+ self._insert(sym, (crc, sym, modname)) -+ -+ -+def parse_argument() -> argparse.Namespace: -+ parser = argparse.ArgumentParser() -+ parser.add_argument("-r", "--release", action="store", -+ required=False, default=platform.release(), -+ help="specific the kernel release version") -+ parser.add_argument("-m", "--module", action="append", -+ required=False, default=[], -+ help="specific the out of tree modules") -+ parser.add_argument("-c", "--check", action="append", -+ required=True, -+ help="specific the checked module, e.g. -c a.ko -c b.ko") -+ options = parser.parse_args() -+ return (options.release, options.module, options.check) -+ -+ -+def main(): -+ release, modules, checks = parse_argument() -+ kabi = KABI(release) -+ -+ for mod in modules: -+ filepath = pathlib.Path(mod) -+ kabi.add_mod_crcs(filepath) -+ -+ print("-------------- start check --------------") -+ passed = 0 -+ failed = 0 -+ for mod in checks: -+ filepath = pathlib.Path(mod) -+ modname = filepath.name -+ result, msg = kabi.check_mod_syms(filepath) -+ if not result: -+ print(f"module {modname} fail: {msg}") -+ failed += 1 -+ else: -+ print(f"module {modname} pass") -+ passed += 1 -+ print(f"-------------- {passed} pass, {failed} failed --------------") -+ -+ -+if __name__ == '__main__': -+ main() --- -2.34.1 - diff --git a/0073-criu-fix-conflicting-headers.patch b/0073-criu-fix-conflicting-headers.patch deleted file mode 100644 index 5cf9bf4be62aaf73c7b4af3af2685dc52119b520..0000000000000000000000000000000000000000 --- a/0073-criu-fix-conflicting-headers.patch +++ /dev/null @@ -1,269 +0,0 @@ -From 9e512890a5858431acb42a2a685b445e7111dfc5 Mon Sep 17 00:00:00 2001 -From: z00557007 -Date: Wed, 4 Jan 2023 16:26:16 +0800 -Subject: [PATCH] criu: fix conflicting headers There are several changes in - glibc 2.36 that make sys/mount.h header incompatible with kernel headers: - -https://sourceware.org/glibc/wiki/Release/2.36#Usage_of_.3Clinux.2Fmount.h.3E_and_.3Csys.2Fmount.h.3E - -This patch removes conflicting includes for `` and -updates the content of `criu/include/linux/mount.h` to match -`/usr/include/sys/mount.h`. In addition, inline definitions sys_*() -functions have been moved from "linux/mount.h" to "syscall.h" to -avoid conflicts with `uapi/compel/plugins/std/syscall.h` and -``. The include for `` has been replaced -with local include to avoid conflicts with ``. - -Signed-off-by: Radostin Stoyanov ---- - Makefile.config | 2 +- - criu/cgroup.c | 1 + - criu/cr-check.c | 2 +- - criu/cr-restore.c | 3 ++- - criu/include/aio.h | 2 +- - criu/include/linux/aio_abi.h | 14 ++++++++++++++ - criu/include/linux/mount.h | 34 ++++++++++++++++++---------------- - criu/include/syscall.h | 17 +++++++++++++++++ - criu/pie/parasite.c | 2 +- - criu/util.c | 1 + - scripts/feature-tests.mak | 13 ------------- - 11 files changed, 57 insertions(+), 34 deletions(-) - create mode 100644 criu/include/linux/aio_abi.h - create mode 100644 criu/include/syscall.h - -diff --git a/Makefile.config b/Makefile.config -index 6e3e1b0..0b4ccd4 100644 ---- a/Makefile.config -+++ b/Makefile.config -@@ -70,7 +70,7 @@ export DEFINES += $(FEATURE_DEFINES) - export CFLAGS += $(FEATURE_DEFINES) - - FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \ -- SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG MEMFD_CREATE -+ SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW MEMFD_CREATE - - # $1 - config name - define gen-feature-test -diff --git a/criu/cgroup.c b/criu/cgroup.c -index ccac37f..3874b65 100644 ---- a/criu/cgroup.c -+++ b/criu/cgroup.c -@@ -27,6 +27,7 @@ - #include "images/cgroup.pb-c.h" - #include "kerndat.h" - #include "linux/mount.h" -+#include "syscall.h" - - /* - * This structure describes set of controller groups -diff --git a/criu/cr-check.c b/criu/cr-check.c -index ba87511..951e71d 100644 ---- a/criu/cr-check.c -+++ b/criu/cr-check.c -@@ -21,7 +21,6 @@ - #include - #include - #include --#include - - #include "../soccr/soccr.h" - -@@ -52,6 +51,7 @@ - #include "net.h" - #include "restorer.h" - #include "uffd.h" -+#include "linux/aio_abi.h" - - #include "images/inventory.pb-c.h" - -diff --git a/criu/cr-restore.c b/criu/cr-restore.c -index c3ff65d..10be969 100644 ---- a/criu/cr-restore.c -+++ b/criu/cr-restore.c -@@ -22,7 +22,6 @@ - #include - #include "common/compiler.h" - --#include "linux/mount.h" - #include "linux/rseq.h" - - #include "clone-noasan.h" -@@ -89,6 +88,8 @@ - #include - #include "compel/include/asm/syscall.h" - -+#include "linux/mount.h" -+ - #include "protobuf.h" - #include "images/sa.pb-c.h" - #include "images/timer.pb-c.h" -diff --git a/criu/include/aio.h b/criu/include/aio.h -index f8a59df..715a45c 100644 ---- a/criu/include/aio.h -+++ b/criu/include/aio.h -@@ -1,7 +1,7 @@ - #ifndef __CR_AIO_H__ - #define __CR_AIO_H__ - --#include -+#include "linux/aio_abi.h" - #include "images/mm.pb-c.h" - unsigned int aio_estimate_nr_reqs(unsigned int size); - int dump_aio_ring(MmEntry *mme, struct vma_area *vma); -diff --git a/criu/include/linux/aio_abi.h b/criu/include/linux/aio_abi.h -new file mode 100644 -index 0000000..d9ce787 ---- /dev/null -+++ b/criu/include/linux/aio_abi.h -@@ -0,0 +1,14 @@ -+#ifndef __LINUX__AIO_ABI_H -+#define __LINUX__AIO_ABI_H -+ -+typedef __kernel_ulong_t aio_context_t; -+ -+/* read() from /dev/aio returns these structures. */ -+struct io_event { -+ __u64 data; /* the data field from the iocb */ -+ __u64 obj; /* what iocb this event came from */ -+ __s64 res; /* result code for this event */ -+ __s64 res2; /* secondary result */ -+}; -+ -+#endif /* __LINUX__AIO_ABI_H */ -diff --git a/criu/include/linux/mount.h b/criu/include/linux/mount.h -index 840d627..0d55a58 100644 ---- a/criu/include/linux/mount.h -+++ b/criu/include/linux/mount.h -@@ -4,32 +4,34 @@ - #include "common/config.h" - #include "compel/plugins/std/syscall-codes.h" - --#ifdef CONFIG_HAS_FSCONFIG --#include --#else -+/* Copied from /usr/include/sys/mount.h */ -+ -+#ifndef FSCONFIG_CMD_CREATE -+/* The type of fsconfig call made. */ - enum fsconfig_command { - FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ -+#define FSCONFIG_SET_FLAG FSCONFIG_SET_FLAG - FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ -+#define FSCONFIG_SET_STRING FSCONFIG_SET_STRING - FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ -+#define FSCONFIG_SET_BINARY FSCONFIG_SET_BINARY - FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ -+#define FSCONFIG_SET_PATH FSCONFIG_SET_PATH - FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ -+#define FSCONFIG_SET_PATH_EMPTY FSCONFIG_SET_PATH_EMPTY - FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ -+#define FSCONFIG_SET_FD FSCONFIG_SET_FD - FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ -+#define FSCONFIG_CMD_CREATE FSCONFIG_CMD_CREATE - FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ -+#define FSCONFIG_CMD_RECONFIGURE FSCONFIG_CMD_RECONFIGURE - }; --#endif -+#endif // FSCONFIG_CMD_CREATE - --static inline int sys_fsopen(const char *fsname, unsigned int flags) --{ -- return syscall(__NR_fsopen, fsname, flags); --} --static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux) --{ -- return syscall(__NR_fsconfig, fd, cmd, key, value, aux); --} --static inline int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags) --{ -- return syscall(__NR_fsmount, fd, flags, attr_flags); --} -+#ifndef MS_MGC_VAL -+/* Magic mount flag number. Has to be or-ed to the flag values. */ -+#define MS_MGC_VAL 0xc0ed0000 /* Magic flag number to indicate "new" flags */ -+#define MS_MGC_MSK 0xffff0000 /* Magic flag number mask */ -+#endif - - #endif -diff --git a/criu/include/syscall.h b/criu/include/syscall.h -new file mode 100644 -index 0000000..3c0b3a4 ---- /dev/null -+++ b/criu/include/syscall.h -@@ -0,0 +1,17 @@ -+#ifndef __CR_SYSCALL_H__ -+#define __CR_SYSCALL_H__ -+ -+static inline int sys_fsopen(const char *fsname, unsigned int flags) -+{ -+ return syscall(__NR_fsopen, fsname, flags); -+} -+static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux) -+{ -+ return syscall(__NR_fsconfig, fd, cmd, key, value, aux); -+} -+static inline int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags) -+{ -+ return syscall(__NR_fsmount, fd, flags, attr_flags); -+} -+ -+#endif /* __CR_SYSCALL_H__ */ -diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c -index c781303..9f8fbf8 100644 ---- a/criu/pie/parasite.c -+++ b/criu/pie/parasite.c -@@ -3,7 +3,6 @@ - #include - #include - #include --#include - #include - #include - #include -@@ -14,6 +13,7 @@ - #include "int.h" - #include "types.h" - #include -+#include "linux/mount.h" - #include "parasite.h" - #include "fcntl.h" - #include "prctl.h" -diff --git a/criu/util.c b/criu/util.c -index e682161..915a043 100644 ---- a/criu/util.c -+++ b/criu/util.c -@@ -37,6 +37,7 @@ - #include "mem.h" - #include "namespaces.h" - #include "criu-log.h" -+#include "syscall.h" - - #include "clone-noasan.h" - #include "cr_options.h" -diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak -index 8df20af..eecefa3 100644 ---- a/scripts/feature-tests.mak -+++ b/scripts/feature-tests.mak -@@ -137,19 +137,6 @@ ENTRY(main) - END(main) - endef - --define FEATURE_TEST_FSCONFIG -- --#include -- --int main(void) --{ -- if (FSCONFIG_CMD_CREATE > 0) -- return 0; -- return 0; --} -- --endef -- - define FEATURE_TEST_NFTABLES_LIB_API_0 - - #include --- -2.33.0 - diff --git a/0074-mount-add-definition-for-FSOPEN_CLOEXEC.patch b/0074-mount-add-definition-for-FSOPEN_CLOEXEC.patch deleted file mode 100644 index 48b735902e5567f6dcf4f924af79df1e3de60cc4..0000000000000000000000000000000000000000 --- a/0074-mount-add-definition-for-FSOPEN_CLOEXEC.patch +++ /dev/null @@ -1,104 +0,0 @@ -From ae4b0ff2b9c91859513d841ebb71a67bed8a0d7c Mon Sep 17 00:00:00 2001 -From: z00557007 -Date: Wed, 4 Jan 2023 17:22:29 +0800 -Subject: [PATCH] mount: add definition for FSOPEN_CLOEXEC A recent change in - glibc introduced `enum fsconfig_command` [1] and as a result the compilation - of criu fails with the following errors - -In file included from criu/pie/util.c:3: -/usr/include/sys/mount.h:240:6: error: redeclaration of 'enum fsconfig_command' - 240 | enum fsconfig_command - | ^~~~~~~~~~~~~~~~ -In file included from /usr/include/sys/mount.h:32: -criu/include/linux/mount.h:11:6: note: originally defined here - 11 | enum fsconfig_command { - | ^~~~~~~~~~~~~~~~ -/usr/include/sys/mount.h:242:3: error: redeclaration of enumerator 'FSCONFIG_SET_FLAG' - 242 | FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ - | ^~~~~~~~~~~~~~~~~ -criu/include/linux/mount.h:12:9: note: previous definition of 'FSCONFIG_SET_FLAG' with type 'enum fsconfig_command' - 12 | FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ - | ^~~~~~~~~~~~~~~~~ -/usr/include/sys/mount.h:244:3: error: redeclaration of enumerator 'FSCONFIG_SET_STRING' - 244 | FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ - | ^~~~~~~~~~~~~~~~~~~ -criu/include/linux/mount.h:14:9: note: previous definition of 'FSCONFIG_SET_STRING' with type 'enum fsconfig_command' - 14 | FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ - | ^~~~~~~~~~~~~~~~~~~ -/usr/include/sys/mount.h:246:3: error: redeclaration of enumerator 'FSCONFIG_SET_BINARY' - 246 | FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ - | ^~~~~~~~~~~~~~~~~~~ -criu/include/linux/mount.h:16:9: note: previous definition of 'FSCONFIG_SET_BINARY' with type 'enum fsconfig_command' - 16 | FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ - | ^~~~~~~~~~~~~~~~~~~ -/usr/include/sys/mount.h:248:3: error: redeclaration of enumerator 'FSCONFIG_SET_PATH' - 248 | FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ - | ^~~~~~~~~~~~~~~~~ -criu/include/linux/mount.h:18:9: note: previous definition of 'FSCONFIG_SET_PATH' with type 'enum fsconfig_command' - 18 | FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ - | ^~~~~~~~~~~~~~~~~ -/usr/include/sys/mount.h:250:3: error: redeclaration of enumerator 'FSCONFIG_SET_PATH_EMPTY' - 250 | FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ - | ^~~~~~~~~~~~~~~~~~~~~~~ -criu/include/linux/mount.h:20:9: note: previous definition of 'FSCONFIG_SET_PATH_EMPTY' with type 'enum fsconfig_command' - 20 | FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ - | ^~~~~~~~~~~~~~~~~~~~~~~ -/usr/include/sys/mount.h:252:3: error: redeclaration of enumerator 'FSCONFIG_SET_FD' - 252 | FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ - | ^~~~~~~~~~~~~~~ -criu/include/linux/mount.h:22:9: note: previous definition of 'FSCONFIG_SET_FD' with type 'enum fsconfig_command' - 22 | FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ - | ^~~~~~~~~~~~~~~ -/usr/include/sys/mount.h:254:3: error: redeclaration of enumerator 'FSCONFIG_CMD_CREATE' - 254 | FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ - | ^~~~~~~~~~~~~~~~~~~ -criu/include/linux/mount.h:24:9: note: previous definition of 'FSCONFIG_CMD_CREATE' with type 'enum fsconfig_command' - 24 | FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ - | ^~~~~~~~~~~~~~~~~~~ -/usr/include/sys/mount.h:256:3: error: redeclaration of enumerator 'FSCONFIG_CMD_RECONFIGURE' - 256 | FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ - | ^~~~~~~~~~~~~~~~~~~~~~~~ -criu/include/linux/mount.h:26:9: note: previous definition of 'FSCONFIG_CMD_RECONFIGURE' with type 'enum fsconfig_command' - 26 | FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ - -This patch adds definition for FSOPEN_CLOEXEC to solve this problem. In particular, -sys/mount.h includes ifndef check for FSOPEN_CLOEXEC surrounding `enum fsconfig_command`. - -[1] https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=7eae6a91e9b1670330c9f15730082c91c0b1d570 - -Reported-by: Younes Manton (@ymanton) -Signed-off-by: Radostin Stoyanov ---- - criu/include/linux/mount.h | 9 +++++++-- - 1 file changed, 7 insertions(+), 2 deletions(-) - -diff --git a/criu/include/linux/mount.h b/criu/include/linux/mount.h -index 0d55a58..ee9386c 100644 ---- a/criu/include/linux/mount.h -+++ b/criu/include/linux/mount.h -@@ -6,7 +6,7 @@ - - /* Copied from /usr/include/sys/mount.h */ - --#ifndef FSCONFIG_CMD_CREATE -+#ifndef FSOPEN_CLOEXEC - /* The type of fsconfig call made. */ - enum fsconfig_command { - FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ -@@ -26,7 +26,12 @@ enum fsconfig_command { - FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ - #define FSCONFIG_CMD_RECONFIGURE FSCONFIG_CMD_RECONFIGURE - }; --#endif // FSCONFIG_CMD_CREATE -+#endif // FSOPEN_CLOEXEC -+ -+/* fsopen flags. With the redundant definition, we check if the kernel, -+ * glibc value and our value still match. -+ */ -+#define FSOPEN_CLOEXEC 0x00000001 - - #ifndef MS_MGC_VAL - /* Magic mount flag number. Has to be or-ed to the flag values. */ --- -2.33.0 - diff --git a/0075-compel-fix-parasite-with-GCC-12.patch b/0075-compel-fix-parasite-with-GCC-12.patch deleted file mode 100644 index 48a1b231f9e957bcbe6a7f406b5e0c60306090cd..0000000000000000000000000000000000000000 --- a/0075-compel-fix-parasite-with-GCC-12.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 0568889ee368c2bc2682aae5c69d67ac16eac675 Mon Sep 17 00:00:00 2001 -From: Adrian Reber -Date: Tue, 18 Jan 2022 17:22:46 +0000 -Subject: [PATCH] compel: fix parasite with GCC 12 - -Parasite creation started to fail with GCC 12: - -On x86_64 with: - ./compel/compel-host hgen -f criu/pie/restorer.built-in.o -o criu/pie/restorer-blob.h - Error (compel/src/lib/handle-elf-host.c:337): Unexpected undefined symbol: `strlen'. External symbol in PIE? - -On aarch64 with: - ld: criu/pie/restorer.o: in function `lsm_set_label': - /drone/src/criu/pie/restorer.c:174: undefined reference to `strlen' - -Line 174 is: "for (len = 0; label[len]; len++)" - -Adding '-ffreestanding' to parasite compilation fixes these errors -because, according to GCC developers: - -"strlen is a standard C function, so I don't see any bug in that being used -unless you do a freestanding compilation (-nostdlib isn't that)." - -Signed-off-by: Adrian Reber ---- - compel/src/main.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/compel/src/main.c b/compel/src/main.c -index a9a50959f..f461ff04d 100644 ---- a/compel/src/main.c -+++ b/compel/src/main.c -@@ -19,6 +19,7 @@ - - #define CFLAGS_DEFAULT_SET \ - "-Wstrict-prototypes " \ -+ "-ffreestanding " \ - "-fno-stack-protector -nostdlib -fomit-frame-pointer " - - #define COMPEL_CFLAGS_PIE CFLAGS_DEFAULT_SET "-fpie" --- -2.33.0 - diff --git a/0076-support-build-with-clang.patch b/0076-support-build-with-clang.patch deleted file mode 100644 index 3f4ca8bc33330746038bf9b1ed897579e29e4b4b..0000000000000000000000000000000000000000 --- a/0076-support-build-with-clang.patch +++ /dev/null @@ -1,59 +0,0 @@ -From db2a18df9d47b7511120bc48a614c5abb0d67c16 Mon Sep 17 00:00:00 2001 -From: luofeng -Date: Wed, 6 Sep 2023 14:15:57 +0000 -Subject: [PATCH] support build with clang - ---- - Makefile | 1 - - criu/arch/aarch64/include/asm/restorer.h | 10 +++++----- - 2 files changed, 5 insertions(+), 6 deletions(-) - -diff --git a/Makefile b/Makefile -index c1eafdd..14c0008 100644 ---- a/Makefile -+++ b/Makefile -@@ -82,7 +82,6 @@ endif - - # secure compilation options - CFLAGS += -fstack-protector-all -fPIE --LDFLAGS += -pie - - # - # CFLAGS_PIE: -diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h -index 64a9c24..f12f89d 100644 ---- a/criu/arch/aarch64/include/asm/restorer.h -+++ b/criu/arch/aarch64/include/asm/restorer.h -@@ -13,7 +13,7 @@ - #define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ - thread_args, clone_restore_fn) \ - asm volatile( \ -- "clone_emul: \n" \ -+ "clone_emul_%=: \n" \ - "ldr x1, %2 \n" \ - "and x1, x1, #~15 \n" \ - "sub x1, x1, #16 \n" \ -@@ -24,16 +24,16 @@ - "mov x8, #"__stringify(__NR_clone)" \n" \ - "svc #0 \n" \ - \ -- "cbz x0, thread_run \n" \ -+ "cbz x0, thread_run_%= \n" \ - \ - "mov %0, x0 \n" \ -- "b clone_end \n" \ -+ "b clone_end_%= \n" \ - \ -- "thread_run: \n" \ -+ "thread_run_%=: \n" \ - "ldp x1, x0, [sp] \n" \ - "br x1 \n" \ - \ -- "clone_end: \n" \ -+ "clone_end_%=: \n" \ - : "=r"(ret) \ - : "r"(clone_flags), \ - "m"(new_sp), \ --- -2.39.1 - diff --git a/0077-fix-clang.patch b/0077-fix-clang.patch deleted file mode 100644 index c623f56c4f6887290f9fb0cdb3443df9a9839562..0000000000000000000000000000000000000000 --- a/0077-fix-clang.patch +++ /dev/null @@ -1,168 +0,0 @@ -diff -u -r criu-3.16.1/compel/test/fdspy/Makefile criu-3.16.1/compel/test/fdspy/Makefile ---- criu-3.16.1/compel/test/fdspy/Makefile 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/compel/test/fdspy/Makefile 2023-07-14 12:04:56.000000000 +0800 -@@ -1,4 +1,4 @@ --CC := gcc -+CC ?= gcc - CFLAGS ?= -O2 -g -Wall -Werror - - COMPEL := ../../../compel/compel-host -diff -u -r criu-3.16.1/compel/test/infect/Makefile criu-3.16.1/compel/test/infect/Makefile ---- criu-3.16.1/compel/test/infect/Makefile 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/compel/test/infect/Makefile 2023-07-14 12:05:03.000000000 +0800 -@@ -1,4 +1,4 @@ --CC := gcc -+CC ?= gcc - CFLAGS ?= -O2 -g -Wall -Werror - - COMPEL := ../../../compel/compel-host -diff -u -r criu-3.16.1/compel/test/rsys/Makefile criu-3.16.1/compel/test/rsys/Makefile ---- criu-3.16.1/compel/test/rsys/Makefile 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/compel/test/rsys/Makefile 2023-07-14 12:04:49.000000000 +0800 -@@ -1,4 +1,4 @@ --CC := gcc -+CC ?= gcc - CFLAGS ?= -O2 -g -Wall -Werror - - COMPEL := ../../../compel/compel-host -diff -u -r criu-3.16.1/scripts/ci/docker.env criu-3.16.1/scripts/ci/docker.env ---- criu-3.16.1/scripts/ci/docker.env 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/scripts/ci/docker.env 2023-07-14 11:51:52.000000000 +0800 -@@ -1,4 +1,4 @@ - SKIP_CI_PREP=1 - ZDTM_OPTS=-x zdtm/static/binfmt_misc -x zdtm/static/sched_policy00 --CC=gcc -+CC=$(CC) - SKIP_EXT_DEV_TEST=1 -diff -u -r criu-3.16.1/scripts/ci/run-ci-tests.sh criu-3.16.1/scripts/ci/run-ci-tests.sh ---- criu-3.16.1/scripts/ci/run-ci-tests.sh 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/scripts/ci/run-ci-tests.sh 2023-07-14 12:06:23.000000000 +0800 -@@ -36,7 +36,7 @@ - # This can fail on aarch64 travis - service apport stop || : - -- if [ "$CLANG" = "1" ]; then -+ if [ "$CC" = "clang" ]; then - # clang support - CC=clang - # If this is running in an environment without gcc installed -diff -u -r criu-3.16.1/scripts/nmk/scripts/tools.mk criu-3.16.1/scripts/nmk/scripts/tools.mk ---- criu-3.16.1/scripts/nmk/scripts/tools.mk 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/scripts/nmk/scripts/tools.mk 2023-07-14 11:52:23.000000000 +0800 -@@ -7,7 +7,7 @@ - ifeq ($(origin LD), default) - LD := $(CROSS_COMPILE)$(HOSTLD) - endif --HOSTCC ?= gcc -+HOSTCC ?= $(CC) - ifeq ($(origin CC), default) - CC := $(CROSS_COMPILE)$(HOSTCC) - endif -diff -u -r criu-3.16.1/test/others/app-emu/job/Makefile criu-3.16.1/test/others/app-emu/job/Makefile ---- criu-3.16.1/test/others/app-emu/job/Makefile 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/test/others/app-emu/job/Makefile 2023-07-14 12:01:22.000000000 +0800 -@@ -2,10 +2,10 @@ - .PHONY: all - - %.o: %.c -- gcc -c $< -o $@ -+ $(CC) -c $< -o $@ - - job: job.o -- gcc -o $@ job.o -+ $(CC) -o $@ job.o - - clean: - rm -f *.o job -diff -u -r criu-3.16.1/test/others/app-emu/make/Makefile criu-3.16.1/test/others/app-emu/make/Makefile ---- criu-3.16.1/test/others/app-emu/make/Makefile 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/test/others/app-emu/make/Makefile 2023-07-14 12:03:31.000000000 +0800 -@@ -3,7 +3,7 @@ - .PHONY: all - - %.o: %.c -- gcc -c $< -o $@ -+ $(CC) -c $< -o $@ - - foo%.c: tmpl.c - cp $< $@ -diff -u -r criu-3.16.1/test/others/ext-links/Makefile criu-3.16.1/test/others/ext-links/Makefile ---- criu-3.16.1/test/others/ext-links/Makefile 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/test/others/ext-links/Makefile 2023-07-14 12:03:28.000000000 +0800 -@@ -1,4 +1,4 @@ - all: mvlink.so - - mvlink.so: mvlink.c -- gcc -g -Werror -Wall -shared -nostartfiles mvlink.c -o mvlink.so -iquote ../../../criu/include -fPIC -+ $(CC) -g -Werror -Wall -shared -nostartfiles mvlink.c -o mvlink.so -iquote ../../../criu/include -fPIC -diff -u -r criu-3.16.1/test/others/libcriu/Makefile criu-3.16.1/test/others/libcriu/Makefile ---- criu-3.16.1/test/others/libcriu/Makefile 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/test/others/libcriu/Makefile 2023-07-14 12:03:37.000000000 +0800 -@@ -16,13 +16,13 @@ - - define genb - $(1): $(1).o lib.o -- gcc $$^ -L ../../../../criu/lib/c/ -L ../../../../criu/images/ -lcriu -o $$@ -+ $(CC) $$^ -L ../../../../criu/lib/c/ -L ../../../../criu/images/ -lcriu -o $$@ - endef - - $(foreach t, $(TESTS), $(eval $(call genb, $(t)))) - - %.o: %.c -- gcc -c $^ -iquote ../../../../criu/criu/include -I../../../../criu/lib/c/ -I../../../../criu/images/ -o $@ -Werror -+ $(CC) -c $^ -iquote ../../../../criu/criu/include -I../../../../criu/lib/c/ -I../../../../criu/images/ -o $@ -Werror - - clean: libcriu_clean - rm -rf $(TESTS) $(TESTS:%=%.o) lib.o -diff -u -r criu-3.16.1/test/others/Makefile criu-3.16.1/test/others/Makefile ---- criu-3.16.1/test/others/Makefile 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/test/others/Makefile 2023-07-14 12:03:25.000000000 +0800 -@@ -1,2 +1,2 @@ - loop: -- gcc -Wall loop.c -o loop -+ $(CC) -Wall loop.c -o loop -diff -u -r criu-3.16.1/test/others/mounts/ext/Makefile criu-3.16.1/test/others/mounts/ext/Makefile ---- criu-3.16.1/test/others/mounts/ext/Makefile 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/test/others/mounts/ext/Makefile 2023-07-14 12:01:34.000000000 +0800 -@@ -1,13 +1,13 @@ - all: ext-mount.so ns_init - - ext-mount.so: ext-mount.c -- gcc -g -Werror -Wall -shared -nostartfiles ext-mount.c -o ext-mount.so -iquote ../../../include -fPIC -+ $(CC) -g -Werror -Wall -shared -nostartfiles ext-mount.c -o ext-mount.so -iquote ../../../include -fPIC - - ns_init: ns_init.o -- gcc -static $< -o $@ -+ $(CC) -static $< -o $@ - - ns_init.o: ns_init.c -- gcc -c $< -o $@ -+ $(CC) -c $< -o $@ - - run: all - ./run.sh -diff -u -r criu-3.16.1/test/others/unix-callback/Makefile criu-3.16.1/test/others/unix-callback/Makefile ---- criu-3.16.1/test/others/unix-callback/Makefile 2021-10-14 13:44:30.000000000 +0800 -+++ criu-3.16.1/test/others/unix-callback/Makefile 2023-07-14 12:01:53.000000000 +0800 -@@ -7,16 +7,16 @@ - protoc-c --proto_path=. --c_out=. unix.proto - - unix-lib.so: unix-lib.c unix.pb-c.c -- gcc -g -Werror -Wall -shared -nostartfiles unix-lib.c unix.pb-c.c -o unix-lib.so -iquote ../../../criu/include -fPIC -+ $(CC) -g -Werror -Wall -shared -nostartfiles unix-lib.c unix.pb-c.c -o unix-lib.so -iquote ../../../criu/include -fPIC - - syslog-lib.so: syslog-lib.c -- gcc -g -Werror -Wall -shared -nostartfiles syslog-lib.c -o syslog-lib.so -iquote ../../../criu/include -fPIC -+ $(CC) -g -Werror -Wall -shared -nostartfiles syslog-lib.c -o syslog-lib.so -iquote ../../../criu/include -fPIC - - unix-server: unix-server.c -- gcc -Werror -Wall -o unix-server unix-server.c -+ $(CC) -Werror -Wall -o unix-server unix-server.c - - unix-client: unix-client.c -- gcc -Werror -Wall -o unix-client unix-client.c -+ $(CC) -Werror -Wall -o unix-client unix-client.c - - clean: - rm -rf data unix-lib.so unix-server unix-client syslog-lib.so output pid unix.pb-c.* - diff --git a/README.en.md b/README.en.md new file mode 100644 index 0000000000000000000000000000000000000000..c3f6d627fda453b257b2600d62833b0f8e089544 --- /dev/null +++ b/README.en.md @@ -0,0 +1,36 @@ +# criu + +#### Description +A tool of Checkpoint/Restore in User-space + +#### Software Architecture +Software architecture description + +#### Installation + +1. xxxx +2. xxxx +3. xxxx + +#### Instructions + +1. xxxx +2. xxxx +3. xxxx + +#### Contribution + +1. Fork the repository +2. Create Feat_xxx branch +3. Commit your code +4. Create Pull Request + + +#### Gitee Feature + +1. You can use Readme\_XXX.md to support different languages, such as Readme\_en.md, Readme\_zh.md +2. Gitee blog [blog.gitee.com](https://blog.gitee.com) +3. Explore open source project [https://gitee.com/explore](https://gitee.com/explore) +4. The most valuable open source project [GVP](https://gitee.com/gvp) +5. The manual of Gitee [https://gitee.com/help](https://gitee.com/help) +6. The most popular members [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8786ea5645bbc4662ff091aeb0250371e878e56c --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# criu + +#### 介绍 +A tool of Checkpoint/Restore in User-space + +#### 软件架构 +软件架构说明 + + +#### 安装教程 + +1. xxxx +2. xxxx +3. xxxx + +#### 使用说明 + +1. xxxx +2. xxxx +3. xxxx + +#### 参与贡献 + +1. Fork 本仓库 +2. 新建 Feat_xxx 分支 +3. 提交代码 +4. 新建 Pull Request + + +#### 码云特技 + +1. 使用 Readme\_XXX.md 来支持不同的语言,例如 Readme\_en.md, Readme\_zh.md +2. 码云官方博客 [blog.gitee.com](https://blog.gitee.com) +3. 你可以 [https://gitee.com/explore](https://gitee.com/explore) 这个地址来了解码云上的优秀开源项目 +4. [GVP](https://gitee.com/gvp) 全称是码云最有价值开源项目,是码云综合评定出的优秀开源项目 +5. 码云官方提供的使用手册 [https://gitee.com/help](https://gitee.com/help) +6. 码云封面人物是一档用来展示码云会员风采的栏目 [https://gitee.com/gitee-stars/](https://gitee.com/gitee-stars/) diff --git a/criu-3.16.1.tar.gz b/criu-3.16.1.tar.gz deleted file mode 100644 index b4767a39de787397647c9e7cbabfc396a26208f1..0000000000000000000000000000000000000000 Binary files a/criu-3.16.1.tar.gz and /dev/null differ diff --git a/criu-3.19.tar.gz b/criu-3.19.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7a57acba43258880039d01cc956dc0b3ccb6a75f Binary files /dev/null and b/criu-3.19.tar.gz differ diff --git a/criu.spec b/criu.spec index 565259fc6ba57e00fa31febc3158ddea18d457a8..c5b8066b24f12acdbdd54a0ced4ca28c2dec59db 100644 --- a/criu.spec +++ b/criu.spec @@ -1,100 +1,22 @@ Name: criu -Version: 3.16.1 -Release: 8 +Version: 3.19 +Release: 1 Provides: crtools = %{version}-%{release} Obsoletes: crtools <= 1.0-2 Summary: A tool of Checkpoint/Restore in User-space License: GPL-2.0-or-later or LGPL-2.1-only -URL: https://criu.org/ -Source0: https://github.com/checkpoint-restore/criu/archive/v%{version}/%{name}-%{version}.tar.gz -BuildRequires: systemd libnet-devel asciidoc xmlto perl-interpreter libselinux-devel gcc +URL: http://criu.org/ +Source0: http://github.com/checkpoint-restore/criu/archive/v%{version}/%{name}-%{version}.tar.gz +BuildRequires: systemd libnet-devel asciidoc xmlto perl-interpreter libselinux-devel gcc make BuildRequires: protobuf-devel protobuf-c-devel python3-devel libnl3-devel libcap-devel -BuildRequires: libmnl-devel libnftnl-devel +BuildRequires: python3-pip python3-setuptools python3-wheel python3-protobuf Recommends: tar ExclusiveArch: x86_64 %{arm} ppc64le aarch64 s390x Requires: %{name} = %{version}-%{release} Provides: %{name}-libs = %{version}-%{release} Obsoletes: %{name}-libs < %{version}-%{release} -Patch: 0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch -Patch: 0002-compel-add-rseq-syscall-into-compel-std-plugin-sysca.patch -Patch: 0003-kerndat-check-for-rseq-syscall-support-Signed-off-by.patch -Patch: 0004-util-move-fork_and_ptrace_attach-helper-from-cr-chec.patch -Patch: 0005-cr-check-Add-ptrace-rseq-conf-dump-feature-Add-get_r.patch -Patch: 0006-rseq-initial-support-TODO-1.-properly-handle-case-wh.patch -Patch: 0007-zdtm-add-simple-test-for-rseq-C-R-Signed-off-by-Alex.patch -Patch: 0008-ci-add-Fedora-Rawhide-based-test-on-Cirrus-We-have-a.patch -Patch: 0009-include-add-thread_pointer.h-from-Glibc-Implementati.patch -Patch: 0010-clone-noasan-unregister-rseq-at-the-thread-start-for.patch -Patch: 0011-zdtm-static-rseq00-fix-rseq-test-when-linking-with-a.patch -Patch: 0012-compel-add-helpers-to-get-set-instruction-pointer-Si.patch -Patch: 0013-cr-dump-fixup-thread-IP-when-inside-rseq-cs-Signed-o.patch -Patch: 0014-zdtm-add-rseq-transition-test-for-amd64-Signed-off-b.patch -Patch: 0015-cr-dump-handle-rseq-flags-field-Userspace-may-config.patch -Patch: 0016-zdtm-add-rseq02-transition-test-with-NO_RESTART-CS-f.patch -Patch: 0017-zdtm-fix-zdtm-static-maps00-case-in-arm64.patch -Patch: 0018-test-flush-ipt-rules-after-program-exits.patch -Patch: 0019-zdtm-fix-cleaning-step-of-zdtm_netns.patch -%ifarch aarch64 -Patch: 0020-mm-add-pin-memory-method-for-criu.patch -Patch: 0021-pid-add-pid-recover-method-for-criu.patch -Patch: 0022-notifier-add-notifier-calling-method-for-checkpoint-.patch -Patch: 0023-block-device-dump-block-device-as-reguler-file.patch -Patch: 0024-anon-inode-add-support-for-anon-inode-fd.patch -Patch: 0025-char_dev-add-support-for-char-device-dump-and-restor.patch -Patch: 0026-improve-char-dev-fd-check-and-repair-method.patch -Patch: 0027-mmap-restore-dev-hisi_sec2-deivce-vma.patch -Patch: 0028-infiniband-fix-the-infiniband-fd-conflict.patch -Patch: 0029-cred-provide-cred-checkpoint-restore-method.patch -Patch: 0030-socket-fix-connect-error-of-invalid-param.patch -Patch: 0031-criu-eventpollfd-fix-for-improper-usage-in-appdata.patch -Patch: 0032-task_exit_notify-add-task-exit-notify-mask-method-fo.patch -Patch: 0033-unix-socket-add-support-for-unix-stream-socket.patch -Patch: 0034-netlink-add-repair-modes-and-clear-resource-when-fai.patch -Patch: 0035-sysvshm-add-dump-restore-sysv-shm-in-host-ipc-ns.patch -Patch: 0036-add-O_REPAIR-flag-to-vma-fd.patch -Patch: 0037-looser-file-mode-and-size-check.patch -Patch: 0038-file-lock-add-repair-mode-to-dump-file-locks.patch -Patch: 0039-unlock-network-when-restore-fails.patch -Patch: 0040-net-add-shared-socket-recover-method-for-criu.patch -Patch: 0041-tcp-save-src-ports-to-ip_local_reserved_ports-when-d.patch -Patch: 0042-reg-file-fix-dump-fail-problem-with-null-seek-op.patch -Patch: 0043-fix-dump-fail-problem-with-no-access-to-get-socket-f.patch -Patch: 0044-proc-parse-fix-vma-offset-value-for-the-sysfs-file-o.patch -Patch: 0045-add-reuse-file-method-for-recover-deleted-file-state.patch -Patch: 0046-sk-fix-share-sockets-repair-problem.patch -Patch: 0047-mm-add-clear-pin-mem-and-init-page-map-option.patch -Patch: 0048-fds-fix-fds-list-restore.patch -Patch: 0049-log-print-error-log-to-dev-kmsg.patch -Patch: 0050-unix-sk-improve-dgram-robustness.patch -Patch: 0051-sk-ignore-the-bind-error-for-icmp-socket.patch -Patch: 0052-optimization-parallel-collecting-vmas.patch -Patch: 0053-mm-add-exec-file-mapping-pin-method.patch -Patch: 0054-ptrace-trace-specific-syscall.patch -Patch: 0055-notifier-rollback-when-open-img-failed.patch -Patch: 0056-detach-don-t-kill-task-when-ptrace-PTRACE_DETACH-ret.patch -Patch: 0057-build-add-secure-compilation-options.patch -Patch: 0058-nftables-add-mnl-api.patch -Patch: 0059-nftables-implement-nft-api-for-tcp.patch -Patch: 0060-net-switch-to-nftables-API.patch -Patch: 0061-zdtm-unlink-kdat-before-testing.patch -Patch: 0062-zdtm-add-host-ns-sysvshm-ipc-case.patch -Patch: 0063-zdtm-add-pinmem-testcase.patch -Patch: 0064-zdtm-init-notifier-testcase.patch -Patch: 0065-zdtm-print-errno-info-when-accessing-.out-failure.patch -Patch: 0066-zdtm-print-more-info-for-fs.c.patch -Patch: 0067-zdtm-add-chardev-testcase.patch -Patch: 0068-zdtm-add-infiniband-testcase.patch -Patch: 0069-zdtm-add-share-port-testcase.patch -Patch: 0070-zdtm-tmp-test-script.patch -Patch: 0071-mod-add-criu-indepent-test.patch -Patch: 0072-kabichk-add-KABI-check-code.patch -Patch: 0076-support-build-with-clang.patch -%endif -Patch: 0073-criu-fix-conflicting-headers.patch -Patch: 0074-mount-add-definition-for-FSOPEN_CLOEXEC.patch -Patch: 0075-compel-fix-parasite-with-GCC-12.patch -Patch: 0077-fix-clang.patch +Patch1: 0001-criu-dump-and-restore-cpu-affinity-of-each-thread.patch %description Checkpoint/Restore in Userspace(CRIU),is a software tool for the linux operating system. @@ -144,6 +66,7 @@ CFLAGS+=`echo %{optflags}` make V=1 WERROR=0 PREFIX=%{_prefix} RUNDIR=/run/criu %install make install-criu DESTDIR=%{buildroot} PREFIX=%{_prefix} LIBDIR=%{_libdir} +make install-crit DESTDIR=%{buildroot} PREFIX=%{_prefix} LIBDIR=%{_libdir} make install-lib DESTDIR=%{buildroot} PREFIX=%{_prefix} LIBDIR=%{_libdir} PYTHON=python3 make install-man DESTDIR=%{buildroot} PREFIX=%{_prefix} LIBDIR=%{_libdir} @@ -163,45 +86,52 @@ chmod 0755 %{buildroot}/run/%{name}/ %exclude %{_libdir}/libcriu.a %files -n python3-criu -%{python3_sitelib}/crit-0.0.1-py%{python3_version}.egg +%{python3_sitelib}/{pycriu*/*,*egg-info} %files -n crit %{_bindir}/crit +%{python3_sitelib}/crit-%{version}.dist-info/ +%{python3_sitelib}/crit %files -n criu-ns %{_sbindir}/criu-ns %files help -%doc COPYING +%doc README.md COPYING %doc %{_mandir}/man8/criu.8* -%doc %{_mandir}/man1/{compel.1*,crit.1*,criu-ns.1*} +%doc %{_mandir}/man1/{compel.1*,crit.1*,criu-ns.1*,criu-amdgpu-plugin.1*} %changelog -* Fri Nov 10 2023 wangqing - 3.16.1-8 -- Fix Source0 URL errors +* Thu Apr 25 2024 snoweay - 3.19-1 +- update to version 3.19 from criu.org -* Tue Aug 22 2023 feng luo - 3.16.1-7 -- Support build with clang +* Mon Jun 19 2023 hewenliang <314264452@qq.com> - 3.16.1-9 +- revert: fix BUG at criu/pstree.c:452 -* Thu Jul 27 2023 zhoujie - 3.16.1-6 -- compel fix parasite with GCC 12 +* Mon Jun 19 2023 hewenliang <314264452@qq.com> - 3.16.1-8 +- revert "rseq c/r support" -* Wed Jan 4 2023 zhoujie - 3.16.1-5 -- Fix compilation problems caused by glibc upgrade +* Tue Nov 22 2022 Hewenliang - 3.16.1-7 +- fix the null pointer of get_tty_driver. +- criu files Dont cache fd ids for device files. +- pre dump call vmsplice with SPLICE_F_GIFT. -* Fri Jul 22 2022 tenglei - 3.16.1-4 -- Remove non-compliant README files -- fix files not found egg-info -- move changelog into spec file +* Tue Nov 22 2022 Hewenliang - 3.16.1-6 +- page-pipe:fix-limiting a pipe size. -* Wed Apr 13 2022 fu.lin - 3.16.1-3 -- backport kinds of feature/bugfix -- spec: split changelog +* Thu Nov 10 2022 caodongxia - 3.16.1-5 +- Modify invalid source0 -* Fri Mar 4 2022 ningyu - 3.16.1-2 +* Wed Oct 19 2022 fu.lin -3.16.1-4 +- bump the version + +* Fri Mar 4 2022 ningyu - 3.16.1-3 - rseq c/r support -* Thu Dec 2 2021 zhouwenpei - 3.16.1-1 +* Sat Feb 26 2022 luolongjun - 3.16.1-2 +- add support for pin memory + +* Fri Dec 24 2021 zhouwenpei - 3.16.1-1 - upgrade criu version to 3.16.1 * Tue Sep 07 2021 chenchen - 3.15-4