diff --git a/0004-kerndat-detect-if-system-support-clone3-with-set_tid.patch b/0004-kerndat-detect-if-system-support-clone3-with-set_tid.patch new file mode 100644 index 0000000000000000000000000000000000000000..285c29d78571b20d1fbfbb3d38afef3ff1ce3f93 --- /dev/null +++ b/0004-kerndat-detect-if-system-support-clone3-with-set_tid.patch @@ -0,0 +1,241 @@ +From 4f5b57b143d2f92682a0ab14c00df3b2f6f87c05 Mon Sep 17 00:00:00 2001 +From: Adrian Reber +Date: Sun, 15 Dec 2019 20:38:46 +0000 +Subject: [PATCH] kerndat: detect if system support clone3() with set_tid + +Linux kernel 5.4 extends clone3() with set_tid to allow processes to +specify the PID of a newly created process. This introduces detection +of the clone3() syscall and if set_tid is supported. + +This first implementation is X86_64 only. + +Signed-off-by: Adrian Reber +Signed-off-by: Sang Yan +--- + compel/arch/arm/plugins/std/syscalls/syscall.def | 1 + + .../ppc64/plugins/std/syscalls/syscall-ppc64.tbl | 1 + + .../s390/plugins/std/syscalls/syscall-s390.tbl | 1 + + .../arch/x86/plugins/std/syscalls/syscall_32.tbl | 1 + + .../arch/x86/plugins/std/syscalls/syscall_64.tbl | 1 + + compel/plugins/include/uapi/std/syscall-types.h | 1 + + criu/cr-check.c | 12 +++++++ + criu/include/kerndat.h | 1 + + criu/include/sched.h | 33 +++++++++++++++++ + criu/kerndat.c | 41 ++++++++++++++++++++++ + 10 files changed, 93 insertions(+) + create mode 100644 criu/include/sched.h + +diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def +index 721ff16..2b93cb0 100644 +--- a/compel/arch/arm/plugins/std/syscalls/syscall.def ++++ b/compel/arch/arm/plugins/std/syscalls/syscall.def +@@ -112,3 +112,4 @@ userfaultfd 282 388 (int flags) + fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len) + cacheflush ! 983042 (void *start, void *end, int flags) + ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) ++clone3 435 435 (struct clone_args *uargs, size_t size) +diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +index 3b30790..7cdf136 100644 +--- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl ++++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +@@ -108,3 +108,4 @@ __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) + __NR_preadv 320 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) + __NR_userfaultfd 364 sys_userfaultfd (int flags) + __NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) ++__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) +diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +index cc13a63..5cf2284 100644 +--- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl ++++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +@@ -108,3 +108,4 @@ __NR_userfaultfd 355 sys_userfaultfd (int flags) + __NR_preadv 328 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) + __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) + __NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) ++__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) +diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +index 7903ab1..f1faace 100644 +--- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl ++++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +@@ -96,3 +96,4 @@ __NR_seccomp 354 sys_seccomp (unsigned int op, unsigned int flags, const char + __NR_memfd_create 356 sys_memfd_create (const char *name, unsigned int flags) + __NR_userfaultfd 374 sys_userfaultfd (int flags) + __NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) ++__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) +diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +index 4ac9164..9056f5e 100644 +--- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl ++++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +@@ -107,3 +107,4 @@ __NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1 + __NR_memfd_create 319 sys_memfd_create (const char *name, unsigned int flags) + __NR_userfaultfd 323 sys_userfaultfd (int flags) + __NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) ++__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) +diff --git a/compel/plugins/include/uapi/std/syscall-types.h b/compel/plugins/include/uapi/std/syscall-types.h +index 57865e7..031e773 100644 +--- a/compel/plugins/include/uapi/std/syscall-types.h ++++ b/compel/plugins/include/uapi/std/syscall-types.h +@@ -39,6 +39,7 @@ struct msghdr; + struct rusage; + struct iocb; + struct pollfd; ++struct clone_args; + + typedef unsigned long aio_context_t; + +diff --git a/criu/cr-check.c b/criu/cr-check.c +index 75a665c..30d6fdc 100644 +--- a/criu/cr-check.c ++++ b/criu/cr-check.c +@@ -1224,6 +1224,16 @@ static int check_uffd_noncoop(void) + return 0; + } + ++static int check_clone3_set_tid(void) ++{ ++ if (!kdat.has_clone3_set_tid) { ++ pr_warn("clone3() with set_tid not supported\n"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + static int check_can_map_vdso(void) + { + if (kdat_can_map_vdso() == 1) +@@ -1373,6 +1383,7 @@ int cr_check(void) + ret |= check_sk_netns(); + ret |= check_kcmp_epoll(); + ret |= check_net_diag_raw(); ++ ret |= check_clone3_set_tid(); + } + + /* +@@ -1476,6 +1487,7 @@ static struct feature_list feature_list[] = { + { "link_nsid", check_link_nsid}, + { "kcmp_epoll", check_kcmp_epoll}, + { "external_net_ns", check_external_net_ns}, ++ { "clone3_set_tid", check_clone3_set_tid}, + { NULL, NULL }, + }; + +diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h +index d93e078..5e78508 100644 +--- a/criu/include/kerndat.h ++++ b/criu/include/kerndat.h +@@ -65,6 +65,7 @@ struct kerndat_s { + bool x86_has_ptrace_fpu_xsave_bug; + bool has_inotify_setnextwd; + bool has_kcmp_epoll_tfd; ++ bool has_clone3_set_tid; + }; + + extern struct kerndat_s kdat; +diff --git a/criu/include/sched.h b/criu/include/sched.h +new file mode 100644 +index 0000000..78f65e3 +--- /dev/null ++++ b/criu/include/sched.h +@@ -0,0 +1,33 @@ ++#ifndef __CR_SCHED_H__ ++#define __CR_SCHED_H__ ++ ++#include ++ ++#ifndef ptr_to_u64 ++#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) ++#endif ++#ifndef u64_to_ptr ++#define u64_to_ptr(x) ((void *)(uintptr_t)x) ++#endif ++ ++/* ++ * This structure is needed by clone3(). The kernel ++ * calls it 'struct clone_args'. As CRIU will always ++ * need at least this part of the structure (VER1) ++ * to be able to test if clone3() with set_tid works, ++ * the structure is defined here as 'struct _clone_args'. ++ */ ++ ++struct _clone_args { ++ __aligned_u64 flags; ++ __aligned_u64 pidfd; ++ __aligned_u64 child_tid; ++ __aligned_u64 parent_tid; ++ __aligned_u64 exit_signal; ++ __aligned_u64 stack; ++ __aligned_u64 stack_size; ++ __aligned_u64 tls; ++ __aligned_u64 set_tid; ++ __aligned_u64 set_tid_size; ++}; ++#endif /* __CR_SCHED_H__ */ +diff --git a/criu/kerndat.c b/criu/kerndat.c +index 39cacb8..a13adbc 100644 +--- a/criu/kerndat.c ++++ b/criu/kerndat.c +@@ -41,6 +41,7 @@ + #include "uffd.h" + #include "vdso.h" + #include "kcmp.h" ++#include "sched.h" + + struct kerndat_s kdat = { + }; +@@ -972,6 +973,44 @@ static int kerndat_tun_netns(void) + return check_tun_netns_cr(&kdat.tun_ns); + } + ++static bool kerndat_has_clone3_set_tid(void) ++{ ++ pid_t pid; ++ struct _clone_args args = {}; ++ ++#ifndef CONFIG_X86_64 ++ /* ++ * Currently the CRIU PIE assembler clone3() wrapper is ++ * only implemented for X86_64. ++ */ ++ kdat.has_clone3_set_tid = false; ++ return 0; ++#endif ++ ++ args.set_tid = -1; ++ /* ++ * On a system without clone3() this will return ENOSYS. ++ * On a system with clone3() but without set_tid this ++ * will return E2BIG. ++ * On a system with clone3() and set_tid it will return ++ * EINVAL. ++ */ ++ pid = syscall(__NR_clone3, &args, sizeof(args)); ++ ++ if (pid == -1 && (errno == ENOSYS || errno == E2BIG)) { ++ kdat.has_clone3_set_tid = false; ++ return 0; ++ } ++ if (pid == -1 && errno == EINVAL) { ++ kdat.has_clone3_set_tid = true; ++ } else { ++ pr_perror("Unexpected error from clone3\n"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + int kerndat_init(void) + { + int ret; +@@ -1043,6 +1082,8 @@ int kerndat_init(void) + ret = kerndat_has_inotify_setnextwd(); + if (!ret) + ret = has_kcmp_epoll_tfd(); ++ if (!ret) ++ ret = kerndat_has_clone3_set_tid(); + + kerndat_lsm(); + kerndat_mmap_min_addr(); +-- +2.9.5 + diff --git a/0005-Add-assembler-wrapper-for-clone3.patch b/0005-Add-assembler-wrapper-for-clone3.patch new file mode 100644 index 0000000000000000000000000000000000000000..c6885054e1e622432f505571d2c119ac93887796 --- /dev/null +++ b/0005-Add-assembler-wrapper-for-clone3.patch @@ -0,0 +1,203 @@ +From bd283ef8b9ed6c5efaf1d6bba96c105b0410ab65 Mon Sep 17 00:00:00 2001 +From: Adrian Reber +Date: Mon, 16 Dec 2019 07:57:03 +0000 +Subject: [PATCH] Add assembler wrapper for clone3() + +To create a new process/thread with a certain PID based on clone3() a +new assembler wrapper is necessary as there is not glibc wrapper (yet). + +Signed-off-by: Adrian Reber +Signed-off-by: Sang Yan +--- + criu/arch/aarch64/include/asm/restorer.h | 7 +++ + criu/arch/arm/include/asm/restorer.h | 7 +++ + criu/arch/ppc64/include/asm/restorer.h | 7 +++ + criu/arch/s390/include/asm/restorer.h | 7 +++ + criu/arch/x86/include/asm/restorer.h | 92 ++++++++++++++++++++++++++++++++ + 5 files changed, 120 insertions(+) + +diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h +index f502cdc..2fe5891 100644 +--- a/criu/arch/aarch64/include/asm/restorer.h ++++ b/criu/arch/aarch64/include/asm/restorer.h +@@ -42,6 +42,13 @@ + "r"(&thread_args[i]) \ + : "x0", "x1", "x2", "x3", "x8", "memory") + ++#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ ++ clone_restore_fn) do { \ ++ pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ ++ pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ ++ ret = -1; \ ++} while (0) ++ + #define ARCH_FAIL_CORE_RESTORE \ + asm volatile( \ + "mov sp, %0 \n" \ +diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h +index 217d920..ad4b58f 100644 +--- a/criu/arch/arm/include/asm/restorer.h ++++ b/criu/arch/arm/include/asm/restorer.h +@@ -43,6 +43,13 @@ + "r"(&thread_args[i]) \ + : "r0", "r1", "r2", "r3", "r7", "memory") + ++#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ ++ clone_restore_fn) do { \ ++ pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ ++ pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ ++ ret = -1; \ ++} while (0) ++ + #define ARCH_FAIL_CORE_RESTORE \ + asm volatile( \ + "mov sp, %0 \n" \ +diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h +index d48d833..19bc3ea 100644 +--- a/criu/arch/ppc64/include/asm/restorer.h ++++ b/criu/arch/ppc64/include/asm/restorer.h +@@ -48,6 +48,13 @@ + "r"(&thread_args[i]) /* %6 */ \ + : "memory","0","3","4","5","6","7","14","15") + ++#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ ++ clone_restore_fn) do { \ ++ pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ ++ pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ ++ ret = -1; \ ++} while (0) ++ + #define arch_map_vdso(map, compat) -1 + + int restore_gpregs(struct rt_sigframe *f, UserPpc64RegsEntry *r); +diff --git a/criu/arch/s390/include/asm/restorer.h b/criu/arch/s390/include/asm/restorer.h +index cfdefca..733f2de 100644 +--- a/criu/arch/s390/include/asm/restorer.h ++++ b/criu/arch/s390/include/asm/restorer.h +@@ -39,6 +39,13 @@ + "d"(&thread_args[i]) \ + : "0", "1", "2", "3", "4", "5", "6", "cc", "memory") + ++#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ ++ clone_restore_fn) do { \ ++ pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ ++ pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ ++ ret = -1; \ ++} while (0) ++ + #define arch_map_vdso(map, compat) -1 + + int restore_gpregs(struct rt_sigframe *f, UserS390RegsEntry *r); +diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h +index 25559b5..731477e 100644 +--- a/criu/arch/x86/include/asm/restorer.h ++++ b/criu/arch/x86/include/asm/restorer.h +@@ -25,6 +25,21 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) + } + #endif /* !CONFIG_COMPAT */ + ++/* ++ * Documentation copied from glibc sysdeps/unix/sysv/linux/x86_64/clone.S ++ * The kernel expects: ++ * rax: system call number ++ * rdi: flags ++ * rsi: child_stack ++ * rdx: TID field in parent ++ * r10: TID field in child ++ * r8: thread pointer ++ * ++ * int clone(unsigned long clone_flags, unsigned long newsp, ++ * int *parent_tidptr, int *child_tidptr, ++ * unsigned long tls); ++ */ ++ + #define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ + thread_args, clone_restore_fn) \ + asm volatile( \ +@@ -63,6 +78,83 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) + "g"(&thread_args[i]) \ + : "rax", "rcx", "rdi", "rsi", "rdx", "r10", "r11", "memory") + ++/* int clone3(struct clone_args *args, size_t size) */ ++#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ ++ clone_restore_fn) \ ++ asm volatile( \ ++ "clone3_emul: \n" \ ++ /* ++ * Prepare stack pointer for child process. The kernel does ++ * stack + stack_size before passing the stack pointer to the ++ * child process. As we have to put the function and the ++ * arguments for the new process on that stack we have handle ++ * the kernel's implicit stack + stack_size. ++ */ \ ++ "movq (%3), %%rsi /* new stack pointer */ \n" \ ++ /* Move the stack_size to %rax to use later as the offset */ \ ++ "movq %4, %%rax \n" \ ++ /* 16 bytes are needed on the stack for function and args */ \ ++ "subq $16, (%%rsi, %%rax) \n" \ ++ "movq %6, %%rdi /* thread args */ \n" \ ++ "movq %%rdi, 8(%%rsi, %%rax) \n" \ ++ "movq %5, %%rdi /* thread function */ \n" \ ++ "movq %%rdi, 0(%%rsi, %%rax) \n" \ ++ /* ++ * The stack address has been modified for the two ++ * elements above (child function, child arguments). ++ * This modified stack needs to be stored back into the ++ * clone_args structure. ++ */ \ ++ "movq (%%rsi), %3 \n" \ ++ /* ++ * Do the actual clone3() syscall. First argument (%rdi) is ++ * the clone_args structure, second argument is the size ++ * of clone_args. ++ */ \ ++ "movq %1, %%rdi /* clone_args */ \n" \ ++ "movq %2, %%rsi /* size */ \n" \ ++ "movl $"__stringify(__NR_clone3)", %%eax \n" \ ++ "syscall \n" \ ++ /* ++ * If clone3() was successful and if we are in the child ++ * '0' is returned. Jump to the child function handler. ++ */ \ ++ "testq %%rax,%%rax \n" \ ++ "jz thread3_run \n" \ ++ /* Return the PID to the parent process. */ \ ++ "movq %%rax, %0 \n" \ ++ "jmp clone3_end \n" \ ++ \ ++ "thread3_run: /* Child process */ \n" \ ++ /* Clear the frame pointer */ \ ++ "xorq %%rbp, %%rbp \n" \ ++ /* Pop the child function from the stack */ \ ++ "popq %%rax \n" \ ++ /* Pop the child function arguments from the stack */ \ ++ "popq %%rdi \n" \ ++ /* Run the child function */ \ ++ "callq *%%rax \n" \ ++ /* ++ * If the child function is expected to return, this ++ * would be the place to handle the return code. In CRIU's ++ * case the child function is expected to not return ++ * and do exit() itself. ++ */ \ ++ \ ++ "clone3_end: \n" \ ++ : "=r"(ret) \ ++ /* ++ * This uses the "r" modifier for all parameters ++ * as clang complained if using "g". ++ */ \ ++ : "r"(&clone_args), \ ++ "r"(size), \ ++ "r"(&clone_args.stack), \ ++ "r"(clone_args.stack_size), \ ++ "r"(clone_restore_fn), \ ++ "r"(args) \ ++ : "rax", "rcx", "rdi", "rsi", "rdx", "r10", "r11", "memory") ++ + #define ARCH_FAIL_CORE_RESTORE \ + asm volatile( \ + "movq %0, %%rsp \n" \ +-- +2.9.5 + diff --git a/0006-Use-clone3-with-set_tid-to-create-processes.patch b/0006-Use-clone3-with-set_tid-to-create-processes.patch new file mode 100644 index 0000000000000000000000000000000000000000..c37caac9deead11682a555a3b62c62a30d538977 --- /dev/null +++ b/0006-Use-clone3-with-set_tid-to-create-processes.patch @@ -0,0 +1,307 @@ +From eb742711bb08d11f670204492a0d0fc165f89d0b Mon Sep 17 00:00:00 2001 +From: Adrian Reber +Date: Mon, 16 Dec 2019 10:42:13 +0000 +Subject: [PATCH] Use clone3() with set_tid to create processes + +With the in Linux Kernel 5.4 introduced clone3() with set_tid it is no +longer necessary to write to to /proc/../ns_last_pid to influence the +next PID number. clone3() can directly select a PID for the newly +created process/thread. + +After checking for the availability of clone3() with set_tid and adding +the assembler wrapper for clone3() in previous patches, this extends +criu/pie/restorer.c and criu/clone-noasan.c to use the newly added +assembler clone3() wrapper to create processes with a certain PID. + +This is a RFC and WIP, but I wanted to share it and run it through CI +for feedback. As the CI will probably not use a 5.4 based kernel it +should just keep on working as before. + +Signed-off-by: Adrian Reber +Signed-off-by: Sang Yan +--- + criu/clone-noasan.c | 32 +++++++++++++++++++++++ + criu/cr-restore.c | 64 ++++++++++++++++++++++++++++----------------- + criu/include/clone-noasan.h | 2 ++ + criu/include/restorer.h | 1 + + criu/include/rst_info.h | 1 + + criu/pie/restorer.c | 64 +++++++++++++++++++++++++++++---------------- + 6 files changed, 117 insertions(+), 47 deletions(-) + +diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c +index 5ca280e..2784d12 100644 +--- a/criu/clone-noasan.c ++++ b/criu/clone-noasan.c +@@ -1,4 +1,10 @@ ++#include + #include ++#include ++ ++#include ++ ++#include "sched.h" + #include "common/compiler.h" + #include "log.h" + #include "common/bug.h" +@@ -22,6 +28,7 @@ + int clone_noasan(int (*fn)(void *), int flags, void *arg) + { + void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 1024, 16); ++ + BUG_ON((flags & CLONE_VM) && !(flags & CLONE_VFORK)); + /* + * Reserve some bytes for clone() internal needs +@@ -29,3 +36,28 @@ int clone_noasan(int (*fn)(void *), int flags, void *arg) + */ + return clone(fn, stack_ptr, flags, arg); + } ++ ++int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, ++ int exit_signal, pid_t pid) ++{ ++ struct _clone_args c_args = {}; ++ ++ BUG_ON(flags & CLONE_VM); ++ ++ /* ++ * Make sure no child signals are requested. clone3() uses ++ * exit_signal for that. ++ */ ++ BUG_ON(flags & 0xff); ++ ++ pr_debug("Creating process using clone3()\n"); ++ ++ c_args.exit_signal = exit_signal; ++ c_args.flags = flags; ++ c_args.set_tid = ptr_to_u64(&pid); ++ c_args.set_tid_size = 1; ++ pid = syscall(__NR_clone3, &c_args, sizeof(c_args)); ++ if (pid == 0) ++ exit(fn(arg)); ++ return pid; ++} +diff --git a/criu/cr-restore.c b/criu/cr-restore.c +index b4530f8..92856ed 100644 +--- a/criu/cr-restore.c ++++ b/criu/cr-restore.c +@@ -1372,40 +1372,55 @@ static inline int fork_with_pid(struct pstree_item *item) + if (!(ca.clone_flags & CLONE_NEWPID)) { + char buf[32]; + int len; +- int fd; ++ int fd = -1; + +- fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); +- if (fd < 0) +- goto err; ++ if (!kdat.has_clone3_set_tid) { ++ fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); ++ if (fd < 0) ++ goto err; ++ } + + lock_last_pid(); + +- len = snprintf(buf, sizeof(buf), "%d", pid - 1); +- if (write(fd, buf, len) != len) { +- pr_perror("%d: Write %s to %s", pid, buf, LAST_PID_PATH); ++ if (!kdat.has_clone3_set_tid) { ++ len = snprintf(buf, sizeof(buf), "%d", pid - 1); ++ if (write(fd, buf, len) != len) { ++ pr_perror("%d: Write %s to %s", pid, buf, ++ LAST_PID_PATH); ++ close(fd); ++ goto err_unlock; ++ } + close(fd); +- goto err_unlock; + } +- close(fd); + } else { + BUG_ON(pid != INIT_PID); + } + +- /* +- * Some kernel modules, such as network packet generator +- * run kernel thread upon net-namespace creattion taking +- * the @pid we've been requeting via LAST_PID_PATH interface +- * so that we can't restore a take with pid needed. +- * +- * Here is an idea -- unhare net namespace in callee instead. +- */ +- /* +- * The cgroup namespace is also unshared explicitly in the +- * move_in_cgroup(), so drop this flag here as well. +- */ +- close_pid_proc(); +- ret = clone_noasan(restore_task_with_children, +- (ca.clone_flags & ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, &ca); ++ if (kdat.has_clone3_set_tid) { ++ ret = clone3_with_pid_noasan(restore_task_with_children, ++ &ca, (ca.clone_flags & ++ ~(CLONE_NEWNET | CLONE_NEWCGROUP)), ++ SIGCHLD, pid); ++ } else { ++ /* ++ * Some kernel modules, such as network packet generator ++ * run kernel thread upon net-namespace creation taking ++ * the @pid we've been requesting via LAST_PID_PATH interface ++ * so that we can't restore a take with pid needed. ++ * ++ * Here is an idea -- unshare net namespace in callee instead. ++ */ ++ /* ++ * The cgroup namespace is also unshared explicitly in the ++ * move_in_cgroup(), so drop this flag here as well. ++ */ ++ close_pid_proc(); ++ ret = clone_noasan(restore_task_with_children, ++ (ca.clone_flags & ++ ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, ++ &ca); ++ } ++ + if (ret < 0) { + pr_perror("Can't fork for %d", pid); + goto err_unlock; +@@ -3557,6 +3572,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns + task_args->vdso_maps_rt = vdso_maps_rt; + task_args->vdso_rt_size = vdso_rt_size; + task_args->can_map_vdso = kdat.can_map_vdso; ++ task_args->has_clone3_set_tid = kdat.has_clone3_set_tid; + + new_sp = restorer_stack(task_args->t->mz); + +diff --git a/criu/include/clone-noasan.h b/criu/include/clone-noasan.h +index 8ef75fa..0cfdaa1 100644 +--- a/criu/include/clone-noasan.h ++++ b/criu/include/clone-noasan.h +@@ -2,5 +2,7 @@ + #define __CR_CLONE_NOASAN_H__ + + int clone_noasan(int (*fn)(void *), int flags, void *arg); ++int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, ++ int exit_signal, pid_t pid); + + #endif /* __CR_CLONE_NOASAN_H__ */ +diff --git a/criu/include/restorer.h b/criu/include/restorer.h +index b93807f..dfb4e6b 100644 +--- a/criu/include/restorer.h ++++ b/criu/include/restorer.h +@@ -221,6 +221,7 @@ struct task_restore_args { + #endif + int lsm_type; + int child_subreaper; ++ bool has_clone3_set_tid; + } __aligned(64); + + /* +diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h +index 07c634f..3283849 100644 +--- a/criu/include/rst_info.h ++++ b/criu/include/rst_info.h +@@ -4,6 +4,7 @@ + #include "common/lock.h" + #include "common/list.h" + #include "vma.h" ++#include "kerndat.h" + + struct task_entries { + int nr_threads, nr_tasks, nr_helpers; +diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c +index 390c0e1..8bdc88a 100644 +--- a/criu/pie/restorer.c ++++ b/criu/pie/restorer.c +@@ -35,6 +35,7 @@ + #include "sk-inet.h" + #include "vma.h" + #include "uffd.h" ++#include "sched.h" + + #include "common/lock.h" + #include "common/page.h" +@@ -1769,16 +1770,19 @@ long __export_restore_task(struct task_restore_args *args) + long clone_flags = CLONE_VM | CLONE_FILES | CLONE_SIGHAND | + CLONE_THREAD | CLONE_SYSVSEM | CLONE_FS; + long last_pid_len; ++ pid_t thread_pid; + long parent_tid; + int i, fd = -1; + +- /* One level pid ns hierarhy */ +- fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0); +- if (fd < 0) { +- pr_err("can't open last pid fd %d\n", fd); +- goto core_restore_end; +- } ++ if (!args->has_clone3_set_tid) { ++ /* One level pid ns hierarhy */ ++ fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0); ++ if (fd < 0) { ++ pr_err("can't open last pid fd %d\n", fd); ++ goto core_restore_end; ++ } + ++ } + mutex_lock(&task_entries_local->last_pid_mutex); + + for (i = 0; i < args->nr_threads; i++) { +@@ -1789,24 +1793,38 @@ long __export_restore_task(struct task_restore_args *args) + continue; + + new_sp = restorer_stack(thread_args[i].mz); +- last_pid_len = std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); +- sys_lseek(fd, 0, SEEK_SET); +- ret = sys_write(fd, s, last_pid_len); +- if (ret < 0) { +- pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); +- sys_close(fd); +- mutex_unlock(&task_entries_local->last_pid_mutex); +- goto core_restore_end; +- } +- +- /* +- * To achieve functionality like libc's clone() +- * we need a pure assembly here, because clone()'ed +- * thread will run with own stack and we must not +- * have any additional instructions... oh, dear... +- */ ++ if (args->has_clone3_set_tid) { ++ struct _clone_args c_args = {}; ++ thread_pid = thread_args[i].pid; ++ c_args.set_tid = ptr_to_u64(&thread_pid); ++ c_args.flags = clone_flags; ++ c_args.set_tid_size = 1; ++ /* The kernel does stack + stack_size. */ ++ c_args.stack = new_sp - RESTORE_STACK_SIZE; ++ c_args.stack_size = RESTORE_STACK_SIZE; ++ c_args.child_tid = ptr_to_u64(&thread_args[i].pid); ++ c_args.parent_tid = ptr_to_u64(&parent_tid); ++ pr_debug("Using clone3 to restore the process\n"); ++ RUN_CLONE3_RESTORE_FN(ret, c_args, sizeof(c_args), &thread_args[i], args->clone_restore_fn); ++ } else { ++ last_pid_len = std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); ++ sys_lseek(fd, 0, SEEK_SET); ++ ret = sys_write(fd, s, last_pid_len); ++ if (ret < 0) { ++ pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); ++ sys_close(fd); ++ mutex_unlock(&task_entries_local->last_pid_mutex); ++ goto core_restore_end; ++ } + +- RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); ++ /* ++ * To achieve functionality like libc's clone() ++ * we need a pure assembly here, because clone()'ed ++ * thread will run with own stack and we must not ++ * have any additional instructions... oh, dear... ++ */ ++ RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); ++ } + if (ret != thread_args[i].pid) { + pr_err("Unable to create a thread: %ld\n", ret); + mutex_unlock(&task_entries_local->last_pid_mutex); +-- +2.9.5 + diff --git a/0007-clone3-handle-clone3-with-CLONE_PARENT.patch b/0007-clone3-handle-clone3-with-CLONE_PARENT.patch new file mode 100644 index 0000000000000000000000000000000000000000..a6d61a0f9678413011cca9f0bca490e41f59e39f --- /dev/null +++ b/0007-clone3-handle-clone3-with-CLONE_PARENT.patch @@ -0,0 +1,43 @@ +From 4b547f723a3fdc60c2b68ed0141b150b94d54c8c Mon Sep 17 00:00:00 2001 +From: Adrian Reber +Date: Sat, 25 Jan 2020 13:25:21 +0100 +Subject: [PATCH] clone3: handle clone3() with CLONE_PARENT + +clone3() explicitly blocks setting an exit_signal if CLONE_PARENT is +specified. With clone() it also did not work, but there was no error +message. The exit signal from the thread group leader is taken. + +Signed-off-by: Adrian Reber +Signed-off-by: Sang Yan +--- + criu/clone-noasan.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c +index 2784d12..7485a52 100644 +--- a/criu/clone-noasan.c ++++ b/criu/clone-noasan.c +@@ -52,7 +52,19 @@ int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, + + pr_debug("Creating process using clone3()\n"); + +- c_args.exit_signal = exit_signal; ++ /* ++ * clone3() explicitly blocks setting an exit_signal ++ * if CLONE_PARENT is specified. With clone() it also ++ * did not work, but there was no error message. The ++ * exit signal from the thread group leader is taken. ++ */ ++ if (!(flags & CLONE_PARENT)) { ++ if (exit_signal != SIGCHLD) { ++ pr_err("Exit signal not SIGCHLD\n"); ++ return -1; ++ } ++ c_args.exit_signal = exit_signal; ++ } + c_args.flags = flags; + c_args.set_tid = ptr_to_u64(&pid); + c_args.set_tid_size = 1; +-- +2.9.5 + diff --git a/0008-aarch64-use-clone3-if-possible.patch b/0008-aarch64-use-clone3-if-possible.patch new file mode 100644 index 0000000000000000000000000000000000000000..112d62e559ce73abcfaf6049c3228b743e1b92e7 --- /dev/null +++ b/0008-aarch64-use-clone3-if-possible.patch @@ -0,0 +1,114 @@ +From b7563d356de8f0765d8832d7b5f3911869ad5a0d Mon Sep 17 00:00:00 2001 +From: Adrian Reber +Date: Sun, 19 Jan 2020 21:42:58 +0100 +Subject: [PATCH] aarch64: use clone3() if possible + +This adds the parasite clone3() with set_tid wrapper for aarch64. + +Tested on Fedora 31 with 5.5.0-rc6. + +Signed-off-by: Adrian Reber +Signed-off-by: Sang Yan +--- + criu/arch/aarch64/include/asm/restorer.h | 67 +++++++++++++++++++++++++++++--- + criu/kerndat.c | 4 +- + 2 files changed, 63 insertions(+), 8 deletions(-) + +diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h +index 2fe5891..120fa8f 100644 +--- a/criu/arch/aarch64/include/asm/restorer.h ++++ b/criu/arch/aarch64/include/asm/restorer.h +@@ -42,12 +42,67 @@ + "r"(&thread_args[i]) \ + : "x0", "x1", "x2", "x3", "x8", "memory") + +-#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ +- clone_restore_fn) do { \ +- pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ +- pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ +- ret = -1; \ +-} while (0) ++/* ++ * Based on sysdeps/unix/sysv/linux/aarch64/clone.S ++ * ++ * int clone(int (*fn)(void *arg), x0 ++ * void *child_stack, x1 ++ * int flags, x2 ++ * void *arg, x3 ++ * pid_t *ptid, x4 ++ * struct user_desc *tls, x5 ++ * pid_t *ctid); x6 ++ * ++ * int clone3(struct clone_args *args, x0 ++ * size_t size); x1 ++ * ++ * Always consult the CLONE3 wrappers for other architectures ++ * for additional details. ++ * ++ */ ++ ++#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ ++ clone_restore_fn) \ ++ asm volatile( \ ++ /* In contrast to the clone() wrapper above this does not put ++ * the thread function and its arguments on the child stack, ++ * but uses registers to pass these parameters to the child process. ++ * Based on the glibc clone() wrapper at ++ * sysdeps/unix/sysv/linux/aarch64/clone.S. ++ */ \ ++ "clone3_emul: \n" \ ++ /* ++ * Based on the glibc clone() wrapper, which uses x10 and x11 ++ * to save the arguments for the child process, this does the same. ++ * x10 for the thread function and x11 for the thread arguments. ++ */ \ ++ "mov x10, %3 /* clone_restore_fn */ \n" \ ++ "mov x11, %4 /* args */ \n" \ ++ "mov x0, %1 /* &clone_args */ \n" \ ++ "mov x1, %2 /* size */ \n" \ ++ /* Load syscall number */ \ ++ "mov x8, #"__stringify(__NR_clone3)" \n" \ ++ /* Do the syscall */ \ ++ "svc #0 \n" \ ++ \ ++ "cbz x0, clone3_thread_run \n" \ ++ \ ++ "mov %0, x0 \n" \ ++ "b clone3_end \n" \ ++ \ ++ "clone3_thread_run: \n" \ ++ /* Move args to x0 */ \ ++ "mov x0, x11 \n" \ ++ /* Jump to clone_restore_fn */ \ ++ "br x10 \n" \ ++ \ ++ "clone3_end: \n" \ ++ : "=r"(ret) \ ++ : "r"(&clone_args), \ ++ "r"(size), \ ++ "r"(clone_restore_fn), \ ++ "r"(args) \ ++ : "x0", "x1", "x8", "x10", "x11", "memory") + + #define ARCH_FAIL_CORE_RESTORE \ + asm volatile( \ +diff --git a/criu/kerndat.c b/criu/kerndat.c +index a13adbc..52aac55 100644 +--- a/criu/kerndat.c ++++ b/criu/kerndat.c +@@ -978,10 +978,10 @@ static bool kerndat_has_clone3_set_tid(void) + pid_t pid; + struct _clone_args args = {}; + +-#ifndef CONFIG_X86_64 ++#if !defined(CONFIG_X86_64) && !defined(CONFIG_AARCH64) + /* + * Currently the CRIU PIE assembler clone3() wrapper is +- * only implemented for X86_64. ++ * only implemented for X86_64, AARCH64. + */ + kdat.has_clone3_set_tid = false; + return 0; +-- +2.9.5 + diff --git a/0009-criu-dump-and-restore-cpu-affinity-of-each-thread.patch b/0009-criu-dump-and-restore-cpu-affinity-of-each-thread.patch new file mode 100644 index 0000000000000000000000000000000000000000..962a334c17f797104dda7b111bfedb780b3c700f --- /dev/null +++ b/0009-criu-dump-and-restore-cpu-affinity-of-each-thread.patch @@ -0,0 +1,417 @@ +From baa12b00eeb88bee4de11e28df623662a2b32078 Mon Sep 17 00:00:00 2001 +From: Sang Yan +Date: Thu, 26 Nov 2020 21:18:54 +0800 +Subject: [PATCH] criu: dump and restore cpu affinity of each thread + +Criu should dump and restore threads' or processes' +cpu affinity. + +Add one entry of thread_cpuallow_entry into +thread_core_entry to save cpu affinity info. + +Restore it after threads restored but before running. + +Add option --with-cpu-affinity to enable this function +at restore. + +Signed-off-by: Sang Yan +--- + compel/arch/arm/plugins/std/syscalls/syscall.def | 1 + + .../ppc64/plugins/std/syscalls/syscall-ppc64.tbl | 1 + + .../s390/plugins/std/syscalls/syscall-s390.tbl | 1 + + .../arch/x86/plugins/std/syscalls/syscall_32.tbl | 1 + + .../arch/x86/plugins/std/syscalls/syscall_64.tbl | 1 + + criu/config.c | 1 + + criu/cr-dump.c | 14 ++++++++ + criu/cr-restore.c | 26 ++++++++++++++ + criu/crtools.c | 2 ++ + criu/include/cr_options.h | 1 + + criu/include/restorer.h | 3 ++ + criu/pie/restorer.c | 38 ++++++++++++++++++++ + criu/pstree.c | 7 ++++ + images/core.proto | 5 +++ + test/zdtm/static/Makefile | 1 + + test/zdtm/static/cpu-affinity0.c | 42 ++++++++++++++++++++++ + test/zdtm/static/cpu-affinity0.desc | 1 + + 17 files changed, 146 insertions(+) + create mode 100644 test/zdtm/static/cpu-affinity0.c + create mode 100644 test/zdtm/static/cpu-affinity0.desc + +diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def +index 2b93cb0..eaea589 100644 +--- a/compel/arch/arm/plugins/std/syscalls/syscall.def ++++ b/compel/arch/arm/plugins/std/syscalls/syscall.def +@@ -113,3 +113,4 @@ fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len) + cacheflush ! 983042 (void *start, void *end, int flags) + ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) + clone3 435 435 (struct clone_args *uargs, size_t size) ++sched_setaffinity 122 241 (int fd, size_t cpusetsize, const cpu_set_t *mask) +diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +index 7cdf136..3c7497e 100644 +--- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl ++++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +@@ -109,3 +109,4 @@ __NR_preadv 320 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, + __NR_userfaultfd 364 sys_userfaultfd (int flags) + __NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) + __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) ++__NR_sched_setaffinity 222 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask) +diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +index 5cf2284..cf13120 100644 +--- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl ++++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +@@ -109,3 +109,4 @@ __NR_preadv 328 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, + __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) + __NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) + __NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) ++__NR_sched_setaffinity 239 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask) +diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +index f1faace..20f76f2 100644 +--- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl ++++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +@@ -63,6 +63,7 @@ __NR_mincore 218 sys_mincore (void *addr, unsigned long size, unsigned char * + __NR_madvise 219 sys_madvise (unsigned long start, size_t len, int behavior) + __NR_gettid 224 sys_gettid (void) + __NR_futex 240 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3) ++__NR_sched_setaffinity 241 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask) + __NR_set_thread_area 243 sys_set_thread_area (user_desc_t *info) + __NR_get_thread_area 244 sys_get_thread_area (user_desc_t *info) + __NR_io_setup 245 sys_io_setup (unsigned nr_reqs, aio_context_t *ctx32p) +diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +index 9056f5e..38e384d 100644 +--- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl ++++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +@@ -73,6 +73,7 @@ __NR_mount 165 sys_mount (char *dev_nmae, char *dir_name, char *type, unsign + __NR_umount2 166 sys_umount2 (char *name, int flags) + __NR_gettid 186 sys_gettid (void) + __NR_futex 202 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3) ++__NR_sched_setaffinity 203 sys_sched_setaffinity (int fd, size_t cpusetsize, const cpu_set_t *mask) + __NR_set_thread_area 205 sys_set_thread_area (user_desc_t *info) + __NR_io_setup 206 sys_io_setup (unsigned nr_events, aio_context_t *ctx) + __NR_io_getevents 208 sys_io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo) +diff --git a/criu/config.c b/criu/config.c +index 39aa071..76d6e5f 100644 +--- a/criu/config.c ++++ b/criu/config.c +@@ -516,6 +516,7 @@ int parse_options(int argc, char **argv, bool *usage_error, + { "tls-key", required_argument, 0, 1095}, + BOOL_OPT("tls", &opts.tls), + {"tls-no-cn-verify", no_argument, &opts.tls_no_cn_verify, true}, ++ BOOL_OPT("with-cpu-affinity", &opts.with_cpu_affinity), + { }, + }; + +diff --git a/criu/cr-dump.c b/criu/cr-dump.c +index 9273fc0..0d67073 100644 +--- a/criu/cr-dump.c ++++ b/criu/cr-dump.c +@@ -137,6 +137,7 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc) + { + int ret; + struct sched_param sp; ++ cpu_set_t cpumask; + + BUILD_BUG_ON(SCHED_OTHER != 0); /* default in proto message */ + +@@ -182,6 +183,19 @@ static int dump_sched_info(int pid, ThreadCoreEntry *tc) + tc->has_sched_nice = true; + tc->sched_nice = ret; + ++ pr_info("\tdumping allowed cpus for %d\n", pid); ++ ret = syscall(__NR_sched_getaffinity, pid, sizeof(cpumask), &cpumask); ++ if (ret < 0) { ++ pr_perror("Can't get sched affinity for %d", pid); ++ return -1; ++ } ++ memcpy(tc->allowed_cpus->cpumask, &cpumask, sizeof(cpu_set_t)); ++ pr_info("\t 0x%llx, 0x%llx, 0x%llx, 0x%llx\n", ++ (unsigned long long)tc->allowed_cpus->cpumask[3], ++ (unsigned long long)tc->allowed_cpus->cpumask[2], ++ (unsigned long long)tc->allowed_cpus->cpumask[1], ++ (unsigned long long)tc->allowed_cpus->cpumask[0]); ++ + return 0; + } + +diff --git a/criu/cr-restore.c b/criu/cr-restore.c +index 92856ed..c2be323 100644 +--- a/criu/cr-restore.c ++++ b/criu/cr-restore.c +@@ -114,6 +114,7 @@ static int prepare_restorer_blob(void); + static int prepare_rlimits(int pid, struct task_restore_args *, CoreEntry *core); + static int prepare_posix_timers(int pid, struct task_restore_args *ta, CoreEntry *core); + static int prepare_signals(int pid, struct task_restore_args *, CoreEntry *core); ++static int prepare_allowed_cpus(int pid, struct task_restore_args *ta, CoreEntry *leader_core); + + /* + * Architectures can overwrite this function to restore registers that are not +@@ -902,6 +903,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core) + if (prepare_signals(pid, ta, core)) + return -1; + ++ if (prepare_allowed_cpus(pid, ta, core)) ++ return -1; ++ + if (prepare_posix_timers(pid, ta, core)) + return -1; + +@@ -3037,6 +3041,27 @@ out: + return ret; + } + ++static int prepare_allowed_cpus(int pid, struct task_restore_args *ta, CoreEntry *leader_core) ++{ ++ int i; ++ int *need_cpu_affinity; ++ cpu_set_t *cpumaks; ++ ++ ta->allowed_cpus = (char *)rst_mem_align_cpos(RM_PRIVATE); ++ ++ need_cpu_affinity = rst_mem_alloc(sizeof(int), RM_PRIVATE); ++ *need_cpu_affinity = opts.with_cpu_affinity; ++ ++ for (i = 0; i < current->nr_threads; i++) { ++ cpumaks = rst_mem_alloc(sizeof(cpu_set_t), RM_PRIVATE); ++ if (!cpumaks) ++ return -1; ++ ++ memcpy(cpumaks, current->core[i]->thread_core->allowed_cpus->cpumask, sizeof(cpu_set_t)); ++ } ++ return 0; ++} ++ + extern void __gcov_flush(void) __attribute__((weak)); + void __gcov_flush(void) {} + +@@ -3459,6 +3484,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns + RST_MEM_FIXUP_PPTR(task_args->timerfd); + RST_MEM_FIXUP_PPTR(task_args->posix_timers); + RST_MEM_FIXUP_PPTR(task_args->siginfo); ++ RST_MEM_FIXUP_PPTR(task_args->allowed_cpus); + RST_MEM_FIXUP_PPTR(task_args->rlims); + RST_MEM_FIXUP_PPTR(task_args->helpers); + RST_MEM_FIXUP_PPTR(task_args->zombies); +diff --git a/criu/crtools.c b/criu/crtools.c +index a948756..a22664d 100644 +--- a/criu/crtools.c ++++ b/criu/crtools.c +@@ -388,6 +388,8 @@ usage: + " Namespace can be specified as either pid or file path.\n" + " OPTIONS can be used to specify parameters for userns:\n" + " user:PID,UID,GID\n" ++" --with-cpu-affinity Allow to restore cpu affinity. Only for hosts with\n" ++" same cpu quantity.\n" + "\n" + "Check options:\n" + " Without options, \"criu check\" checks availability of absolutely required\n" +diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h +index 82f76ad..98c5a44 100644 +--- a/criu/include/cr_options.h ++++ b/criu/include/cr_options.h +@@ -144,6 +144,7 @@ struct cr_options { + char *tls_key; + int tls; + int tls_no_cn_verify; ++ int with_cpu_affinity; /* restore cpu affinity */ + }; + + extern struct cr_options opts; +diff --git a/criu/include/restorer.h b/criu/include/restorer.h +index dfb4e6b..bd6ef6a 100644 +--- a/criu/include/restorer.h ++++ b/criu/include/restorer.h +@@ -1,6 +1,7 @@ + #ifndef __CR_RESTORER_H__ + #define __CR_RESTORER_H__ + ++#include + #include + #include + #include +@@ -162,6 +163,8 @@ struct task_restore_args { + siginfo_t *siginfo; + unsigned int siginfo_n; + ++ char *allowed_cpus; ++ + struct rst_tcp_sock *tcp_socks; + unsigned int tcp_socks_n; + +diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c +index 8bdc88a..571341d 100644 +--- a/criu/pie/restorer.c ++++ b/criu/pie/restorer.c +@@ -432,6 +432,40 @@ static int restore_signals(siginfo_t *ptr, int nr, bool group) + return 0; + } + ++static int restore_cpu_affinity(struct task_restore_args *args) ++{ ++ int i; ++ int pid; ++ int ret; ++ int *need_cpu_affinity; ++ cpu_set_t *cpumask; ++ cpu_set_t *allowed_cpus; ++ ++ need_cpu_affinity = (int *)args->allowed_cpus; ++ if (!*need_cpu_affinity) { ++ pr_debug("No need to restore cpu affinity.\n"); ++ return 0; ++ } ++ ++ allowed_cpus = (cpu_set_t *)(args->allowed_cpus + sizeof(int)); ++ for (i = 0; i < args->nr_threads; i++) { ++ pid = args->thread_args[i].pid; ++ cpumask = &allowed_cpus[i]; ++ pr_info("Restoring %d allowed_cpus %llx, %llx, %llx, %llx\n", pid, ++ (unsigned long long)cpumask->__bits[3], ++ (unsigned long long)cpumask->__bits[2], ++ (unsigned long long)cpumask->__bits[1], ++ (unsigned long long)cpumask->__bits[0]); ++ ret = sys_sched_setaffinity(pid, sizeof(cpu_set_t), cpumask); ++ if (ret) { ++ pr_err("\t Restore %d cpumask failed.\n", pid); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ + static int restore_seccomp_filter(pid_t tid, struct thread_restore_args *args) + { + unsigned int flags = args->seccomp_force_tsync ? SECCOMP_FILTER_FLAG_TSYNC : 0; +@@ -1897,6 +1931,10 @@ long __export_restore_task(struct task_restore_args *args) + if (ret) + goto core_restore_end; + ++ ret = restore_cpu_affinity(args); ++ if (ret) ++ goto core_restore_end; ++ + restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD); + + rst_tcp_socks_all(args); +diff --git a/criu/pstree.c b/criu/pstree.c +index 92b4167..203ce21 100644 +--- a/criu/pstree.c ++++ b/criu/pstree.c +@@ -58,11 +58,13 @@ CoreEntry *core_entry_alloc(int th, int tsk) + CredsEntry *ce = NULL; + + sz += sizeof(ThreadCoreEntry) + sizeof(ThreadSasEntry) + sizeof(CredsEntry); ++ sz += sizeof(ThreadAllowedcpusEntry); + + sz += CR_CAP_SIZE * sizeof(ce->cap_inh[0]); + sz += CR_CAP_SIZE * sizeof(ce->cap_prm[0]); + sz += CR_CAP_SIZE * sizeof(ce->cap_eff[0]); + sz += CR_CAP_SIZE * sizeof(ce->cap_bnd[0]); ++ sz += sizeof(cpu_set_t); + /* + * @groups are dynamic and allocated + * on demand. +@@ -127,6 +129,11 @@ CoreEntry *core_entry_alloc(int th, int tsk) + ce->cap_eff = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_eff[0])); + ce->cap_bnd = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_bnd[0])); + ++ core->thread_core->allowed_cpus = xptr_pull(&m, ThreadAllowedcpusEntry); ++ thread_allowedcpus_entry__init(core->thread_core->allowed_cpus); ++ core->thread_core->allowed_cpus->n_cpumask = sizeof(cpu_set_t) / sizeof(uint64_t); ++ core->thread_core->allowed_cpus->cpumask = xptr_pull_s(&m, sizeof(cpu_set_t)); ++ + if (arch_alloc_thread_info(core)) { + xfree(core); + core = NULL; +diff --git a/images/core.proto b/images/core.proto +index c3dba6f..535881f 100644 +--- a/images/core.proto ++++ b/images/core.proto +@@ -76,6 +76,10 @@ message thread_sas_entry { + required uint32 ss_flags = 3; + } + ++message thread_allowedcpus_entry { ++ repeated uint64 cpumask = 1; ++} ++ + message thread_core_entry { + required uint64 futex_rla = 1; + required uint32 futex_rla_len = 2; +@@ -93,6 +97,7 @@ message thread_core_entry { + optional uint32 seccomp_filter = 12; + + optional string comm = 13; ++ required thread_allowedcpus_entry allowed_cpus = 15; + } + + message task_rlimits_entry { +diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile +index d8279d6..8ac3442 100644 +--- a/test/zdtm/static/Makefile ++++ b/test/zdtm/static/Makefile +@@ -217,6 +217,7 @@ TST_NOFILE := \ + child_subreaper \ + child_subreaper_existing_child \ + child_subreaper_and_reparent \ ++ cpu-affinity0 \ + # jobctl00 \ + + ifneq ($(SRCARCH),arm) +diff --git a/test/zdtm/static/cpu-affinity0.c b/test/zdtm/static/cpu-affinity0.c +new file mode 100644 +index 0000000..83dee19 +--- /dev/null ++++ b/test/zdtm/static/cpu-affinity0.c +@@ -0,0 +1,42 @@ ++#include ++#include ++#include ++ ++#include "zdtmtst.h" ++ ++const char *test_doc = "Check that with-cpu-affinity option can restore cpu affinity"; ++const char *test_author = "Sang Yan "; ++ ++int main(int argc, char **argv) ++{ ++ cpu_set_t old; ++ cpu_set_t new; ++ ++ test_init(argc, argv); ++ ++ CPU_ZERO(&old); ++ CPU_ZERO(&new); ++ ++ /* test only 0 core because of CI test env limited */ ++ CPU_SET(0, &old); ++ ++ if (sched_setaffinity(getpid(), sizeof(old), &old) < 0) { ++ pr_perror("Can't set old cpu affinity! errno: %d", errno); ++ exit(1); ++ } ++ ++ test_daemon(); ++ test_waitsig(); ++ ++ if (sched_getaffinity(getpid(), sizeof(new), &new) < 0) { ++ pr_perror("Can't get new cpu affinity! errno: %d", errno); ++ exit(1); ++ } ++ ++ if (memcmp(&old, &new, sizeof(cpu_set_t))) ++ fail("Cpu affinity restore failed."); ++ else ++ pass(); ++ ++ return 0; ++} +diff --git a/test/zdtm/static/cpu-affinity0.desc b/test/zdtm/static/cpu-affinity0.desc +new file mode 100644 +index 0000000..0d0b8ae +--- /dev/null ++++ b/test/zdtm/static/cpu-affinity0.desc +@@ -0,0 +1 @@ ++{'dopts': '', 'ropts': '--with-cpu-affinity', 'flags': 'reqrst '} +-- +2.9.5 + diff --git a/0010-vdso-fix-segmentation-fault-caused-by-char-pointer-a.patch b/0010-vdso-fix-segmentation-fault-caused-by-char-pointer-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..a3ee15c9ea4749cef11ac4cb1effc54b0cdbcc8c --- /dev/null +++ b/0010-vdso-fix-segmentation-fault-caused-by-char-pointer-a.patch @@ -0,0 +1,193 @@ +From bcd44583d237684226442aa92cf2ffc41e4ec7e0 Mon Sep 17 00:00:00 2001 +From: anatasluo +Date: Fri, 29 Jan 2021 13:48:57 +0000 +Subject: [PATCH] vdso: fix segmentation fault caused by char pointer array + +When I compile criu with "make DEBUG=1" and run it to restore my +program, it produces a segmentation fault. + +In aarch64, with compile flag "-O0", when criu executes the code in pie, +it is unable to visit the content of ARCH_VDSO_SYMBOLS. So I put these +variables into the stack. + +Signed-off-by: anatasluo +--- + criu/arch/aarch64/include/asm/vdso.h | 17 +++++++++-------- + criu/arch/arm/include/asm/vdso.h | 9 ++++++--- + criu/arch/ppc64/include/asm/vdso.h | 34 +++++++++++++++++++++++----------- + criu/arch/s390/include/asm/vdso.h | 17 +++++++++++------ + criu/arch/x86/include/asm/vdso.h | 23 ++++++++++++++++------- + criu/pie/util-vdso.c | 2 ++ + 6 files changed, 67 insertions(+), 35 deletions(-) + +diff --git a/criu/arch/aarch64/include/asm/vdso.h b/criu/arch/aarch64/include/asm/vdso.h +index 8a65e09..97a2440 100644 +--- a/criu/arch/aarch64/include/asm/vdso.h ++++ b/criu/arch/aarch64/include/asm/vdso.h +@@ -16,15 +16,16 @@ + * Workaround for VDSO array symbol table's relocation. + * XXX: remove when compel/piegen will support aarch64. + */ +-static const char* __maybe_unused aarch_vdso_symbol1 = "__kernel_clock_getres"; +-static const char* __maybe_unused aarch_vdso_symbol2 = "__kernel_clock_gettime"; +-static const char* __maybe_unused aarch_vdso_symbol3 = "__kernel_gettimeofday"; +-static const char* __maybe_unused aarch_vdso_symbol4 = "__kernel_rt_sigreturn"; ++#define ARCH_VDSO_SYMBOLS_LIST \ ++ const char* aarch_vdso_symbol1 = "__kernel_clock_getres"; \ ++ const char* aarch_vdso_symbol2 = "__kernel_clock_gettime"; \ ++ const char* aarch_vdso_symbol3 = "__kernel_gettimeofday"; \ ++ const char* aarch_vdso_symbol4 = "__kernel_rt_sigreturn"; + +-#define ARCH_VDSO_SYMBOLS \ +- aarch_vdso_symbol1, \ +- aarch_vdso_symbol2, \ +- aarch_vdso_symbol3, \ ++#define ARCH_VDSO_SYMBOLS \ ++ aarch_vdso_symbol1, \ ++ aarch_vdso_symbol2, \ ++ aarch_vdso_symbol3, \ + aarch_vdso_symbol4 + + extern void write_intraprocedure_branch(unsigned long to, unsigned long from); +diff --git a/criu/arch/arm/include/asm/vdso.h b/criu/arch/arm/include/asm/vdso.h +index f57790a..e96514e 100644 +--- a/criu/arch/arm/include/asm/vdso.h ++++ b/criu/arch/arm/include/asm/vdso.h +@@ -11,8 +11,11 @@ + */ + #define VDSO_SYMBOL_MAX 2 + #define VDSO_SYMBOL_GTOD 1 +-#define ARCH_VDSO_SYMBOLS \ +- "__vdso_clock_gettime", \ +- "__vdso_gettimeofday" ++#define ARCH_VDSO_SYMBOLS_LIST \ ++ const char* aarch_vdso_symbol1 = "__vdso_clock_gettime"; \ ++ const char* aarch_vdso_symbol2 = "__vdso_gettimeofday"; ++#define ARCH_VDSO_SYMBOLS \ ++ aarch_vdso_symbol1, \ ++ aarch_vdso_symbol2, + + #endif /* __CR_ASM_VDSO_H__ */ +diff --git a/criu/arch/ppc64/include/asm/vdso.h b/criu/arch/ppc64/include/asm/vdso.h +index 6c92348..fe04336 100644 +--- a/criu/arch/ppc64/include/asm/vdso.h ++++ b/criu/arch/ppc64/include/asm/vdso.h +@@ -14,16 +14,28 @@ + */ + #define VDSO_SYMBOL_MAX 10 + #define VDSO_SYMBOL_GTOD 5 +-#define ARCH_VDSO_SYMBOLS \ +- "__kernel_clock_getres", \ +- "__kernel_clock_gettime", \ +- "__kernel_get_syscall_map", \ +- "__kernel_get_tbfreq", \ +- "__kernel_getcpu", \ +- "__kernel_gettimeofday", \ +- "__kernel_sigtramp_rt64", \ +- "__kernel_sync_dicache", \ +- "__kernel_sync_dicache_p5", \ +- "__kernel_time" ++#define ARCH_VDSO_SYMBOLS_LIST \ ++ const char* aarch_vdso_symbol1 = "__kernel_clock_getres"; \ ++ const char* aarch_vdso_symbol2 = "__kernel_clock_gettime"; \ ++ const char* aarch_vdso_symbol3 = "__kernel_get_syscall_map"; \ ++ const char* aarch_vdso_symbol4 = "__kernel_get_tbfreq"; \ ++ const char* aarch_vdso_symbol5 = "__kernel_getcpu"; \ ++ const char* aarch_vdso_symbol6 = "__kernel_gettimeofday"; \ ++ const char* aarch_vdso_symbol7 = "__kernel_sigtramp_rt64"; \ ++ const char* aarch_vdso_symbol8 = "__kernel_sync_dicache"; \ ++ const char* aarch_vdso_symbol9 = "__kernel_sync_dicache_p5"; \ ++ const char* aarch_vdso_symbol10 = "__kernel_time"; ++ ++#define ARCH_VDSO_SYMBOLS \ ++ aarch_vdso_symbol1, \ ++ aarch_vdso_symbol2, \ ++ aarch_vdso_symbol3, \ ++ aarch_vdso_symbol4, \ ++ aarch_vdso_symbol5, \ ++ aarch_vdso_symbol6, \ ++ aarch_vdso_symbol7, \ ++ aarch_vdso_symbol8, \ ++ aarch_vdso_symbol9, \ ++ aarch_vdso_symbol10 + + #endif /* __CR_ASM_VDSO_H__ */ +diff --git a/criu/arch/s390/include/asm/vdso.h b/criu/arch/s390/include/asm/vdso.h +index c54d848..ac71f59 100644 +--- a/criu/arch/s390/include/asm/vdso.h ++++ b/criu/arch/s390/include/asm/vdso.h +@@ -12,13 +12,18 @@ + #define VDSO_SYMBOL_GTOD 0 + + /* +- * This definition is used in pie/util-vdso.c to initialize the vdso symbol ++ * These definitions are used in pie/util-vdso.c to initialize the vdso symbol + * name string table 'vdso_symbols' + */ +-#define ARCH_VDSO_SYMBOLS \ +- "__kernel_gettimeofday", \ +- "__kernel_clock_gettime", \ +- "__kernel_clock_getres", \ +- "__kernel_getcpu" ++#define ARCH_VDSO_SYMBOLS_LIST \ ++ const char* aarch_vdso_symbol1 = "__kernel_gettimeofday"; \ ++ const char* aarch_vdso_symbol2 = "__kernel_clock_gettime"; \ ++ const char* aarch_vdso_symbol3 = "__kernel_clock_getres"; \ ++ const char* aarch_vdso_symbol4 = "__kernel_getcpu"; ++#define ARCH_VDSO_SYMBOLS \ ++ aarch_vdso_symbol1, \ ++ aarch_vdso_symbol2, \ ++ aarch_vdso_symbol3, \ ++ aarch_vdso_symbol4 + + #endif /* __CR_ASM_VDSO_H__ */ +diff --git a/criu/arch/x86/include/asm/vdso.h b/criu/arch/x86/include/asm/vdso.h +index 28ae2d1..54d1fba 100644 +--- a/criu/arch/x86/include/asm/vdso.h ++++ b/criu/arch/x86/include/asm/vdso.h +@@ -35,13 +35,22 @@ + * vsyscall will be patched again when addressing: + * https://github.com/checkpoint-restore/criu/issues/512 + */ +-#define ARCH_VDSO_SYMBOLS \ +- "__vdso_clock_gettime", \ +- "__vdso_getcpu", \ +- "__vdso_gettimeofday", \ +- "__vdso_time", \ +- "__kernel_sigreturn", \ +- "__kernel_rt_sigreturn" ++ ++#define ARCH_VDSO_SYMBOLS_LIST \ ++ const char* aarch_vdso_symbol1 = "__vdso_clock_gettime"; \ ++ const char* aarch_vdso_symbol2 = "__vdso_getcpu"; \ ++ const char* aarch_vdso_symbol3 = "__vdso_gettimeofday"; \ ++ const char* aarch_vdso_symbol4 = "__vdso_time"; \ ++ const char* aarch_vdso_symbol5 = "__kernel_sigreturn"; \ ++ const char* aarch_vdso_symbol6 = "__kernel_rt_sigreturn"; ++ ++#define ARCH_VDSO_SYMBOLS \ ++ aarch_vdso_symbol1, \ ++ aarch_vdso_symbol2, \ ++ aarch_vdso_symbol3, \ ++ aarch_vdso_symbol4, \ ++ aarch_vdso_symbol5, \ ++ aarch_vdso_symbol6 + + /* "__kernel_vsyscall", */ + +diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c +index 104da06..a383f4a 100644 +--- a/criu/pie/util-vdso.c ++++ b/criu/pie/util-vdso.c +@@ -219,6 +219,8 @@ static void parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, + struct vdso_symtable *t, uintptr_t dynsymbol_names, + Hash_t *hash, Dyn_t *dyn_symtab) + { ++ ARCH_VDSO_SYMBOLS_LIST ++ + const char *vdso_symbols[VDSO_SYMBOL_MAX] = { + ARCH_VDSO_SYMBOLS + }; +-- +2.9.5 + diff --git a/criu.spec b/criu.spec index 050ec050d9e54f48ab28452d9d92eaa0033c868d..a00980c33d7afe7001810c29a8683f453f96abb2 100644 --- a/criu.spec +++ b/criu.spec @@ -1,6 +1,6 @@ Name: criu Version: 3.13 -Release: 7 +Release: 8 Provides: crtools = %{version}-%{release} Obsoletes: crtools <= 1.0-2 Summary: A tool of Checkpoint/Restore in User-space @@ -18,6 +18,13 @@ Obsoletes: %{name}-libs < %{version}-%{release} Patch0001: 0001-Fix-crit-encode-TypeError.patch Patch0002: 0002-Fix-crit-info-struct-unpack-error.patch Patch0003: 0003-Fix-crit-x-UnicodeDecodeError.patch +Patch0004: 0004-kerndat-detect-if-system-support-clone3-with-set_tid.patch +Patch0005: 0005-Add-assembler-wrapper-for-clone3.patch +Patch0006: 0006-Use-clone3-with-set_tid-to-create-processes.patch +Patch0007: 0007-clone3-handle-clone3-with-CLONE_PARENT.patch +Patch0008: 0008-aarch64-use-clone3-if-possible.patch +Patch0009: 0009-criu-dump-and-restore-cpu-affinity-of-each-thread.patch +Patch0010: 0010-vdso-fix-segmentation-fault-caused-by-char-pointer-a.patch %description Checkpoint/Restore in Userspace(CRIU),is a software tool for the linux operating system. @@ -91,6 +98,11 @@ chmod 0755 %{buildroot}/run/%{name}/ %doc %{_mandir}/man1/{compel.1*,crit.1*} %changelog +* Fri Feb 26 2021 snoweay - 3.13-8 +- Fix one vdso coredump bug. +- Use clone3 to specify restoring task pid. +- Add cpu affinity save support. + * Tue Sep 22 2020 lingsheng - 3.13-7 - Fix crit errors