From a5f9b8583e4fe4f24a76cf8d7c3f04c177de01bf Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 28 Jul 2019 00:22:29 +0200 Subject: [PATCH 1/2] pidfd: add P_PIDFD to waitid() ANBZ: #21342 commit 3695eae5fee0605f316fbaad0b9e3de791d7dfaf upstream. This adds the P_PIDFD type to waitid(). One of the last remaining bits for the pidfd api is to make it possible to wait on pidfds. With P_PIDFD added to waitid() the parts of userspace that want to use the pidfd api to exclusively manage processes can do so now. One of the things this will unblock in the future is the ability to make it possible to retrieve the exit status via waitid(P_PIDFD) for non-parent processes if handed a _suitable_ pidfd that has this feature set. This is similar to what you can do on FreeBSD with kqueue(). It might even end up being possible to wait on a process as a non-parent if an appropriate property is enabled on the pidfd. With P_PIDFD no scoping of the process identified by the pidfd is possible, i.e. it explicitly blocks things such as wait4(-1), wait4(0), waitid(P_ALL), waitid(P_PGID) etc. It only allows for semantics equivalent to wait4(pid), waitid(P_PID). Users that need scoping should rely on pid-based wait*() syscalls for now. Signed-off-by: Christian Brauner Reviewed-by: Kees Cook Reviewed-by: Oleg Nesterov Cc: Arnd Bergmann Cc: "Eric W. Biederman" Cc: Joel Fernandes (Google) Cc: Thomas Gleixner Cc: David Howells Cc: Jann Horn Cc: Andy Lutomirsky Cc: Andrew Morton Cc: Aleksa Sarai Cc: Linus Torvalds Cc: Al Viro Link: https://lore.kernel.org/r/20190727222229.6516-2-christian@brauner.io Signed-off-by: Gou Hao Signed-off-by: caina Change-Id: Ied6aad01f01a121f28abc085843065dee7783176 Signed-off-by: goutongchen --- include/linux/pid.h | 4 ++++ include/uapi/linux/wait.h | 1 + kernel/exit.c | 33 ++++++++++++++++++++++++++++++--- kernel/fork.c | 8 ++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/include/linux/pid.h b/include/linux/pid.h index 387874c72b07..362bf1e6416b 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -73,6 +73,10 @@ extern struct pid init_struct_pid; extern const struct file_operations pidfd_fops; +struct file; + +extern struct pid *pidfd_pid(const struct file *file); + static inline struct pid *get_pid(struct pid *pid) { if (pid) diff --git a/include/uapi/linux/wait.h b/include/uapi/linux/wait.h index ac49a220cf2a..85b809fc9f11 100644 --- a/include/uapi/linux/wait.h +++ b/include/uapi/linux/wait.h @@ -17,6 +17,7 @@ #define P_ALL 0 #define P_PID 1 #define P_PGID 2 +#define P_PIDFD 3 #endif /* _UAPI_LINUX_WAIT_H */ diff --git a/kernel/exit.c b/kernel/exit.c index eb5dcd9d138c..f46a25b24693 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1553,6 +1553,23 @@ static long do_wait(struct wait_opts *wo) return retval; } +static struct pid *pidfd_get_pid(unsigned int fd) +{ + struct fd f; + struct pid *pid; + + f = fdget(fd); + if (!f.file) + return ERR_PTR(-EBADF); + + pid = pidfd_pid(f.file); + if (!IS_ERR(pid)) + get_pid(pid); + + fdput(f); + return pid; +} + static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, int options, struct rusage *ru) { @@ -1575,19 +1592,29 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, type = PIDTYPE_PID; if (upid <= 0) return -EINVAL; + + pid = find_get_pid(upid); break; case P_PGID: type = PIDTYPE_PGID; if (upid <= 0) return -EINVAL; + + pid = find_get_pid(upid); + break; + case P_PIDFD: + type = PIDTYPE_PID; + if (upid < 0) + return -EINVAL; + + pid = pidfd_get_pid(upid); + if (IS_ERR(pid)) + return PTR_ERR(pid); break; default: return -EINVAL; } - if (type < PIDTYPE_MAX) - pid = find_get_pid(upid); - wo.wo_type = type; wo.wo_pid = pid; wo.wo_flags = options; diff --git a/kernel/fork.c b/kernel/fork.c index 25d46afa19fa..46b4551d715f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1636,6 +1636,14 @@ static inline void rcu_copy_process(struct task_struct *p) #endif /* #ifdef CONFIG_TASKS_RCU */ } +struct pid *pidfd_pid(const struct file *file) +{ + if (file->f_op == &pidfd_fops) + return file->private_data; + + return ERR_PTR(-EBADF); +} + static int pidfd_release(struct inode *inode, struct file *file) { struct pid *pid = file->private_data; -- Gitee From 32b2d2ef8e2843034aac0dfd86ac0903c7cd4e93 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 23 Jul 2019 07:44:46 -0500 Subject: [PATCH 2/2] waitid: Add support for waiting for the current process group ANBZ: #21342 commit 821cc7b0b205c0df64cce59aacc330af251fa8f7 upstream. It was recently discovered that the linux version of waitid is not a superset of the other wait functions because it does not include support for waiting for the current process group. This has two downsides: 1. An extra system call is needed to get the current process group. 2. After the current process group is received and before it is passed to waitid a signal could arrive causing the current process group to change. Inherent race-conditions as these make it impossible for userspace to emulate this functionaly and thus violate async-signal safety requirements for waitpid. Arguments can be made for using a different choice of idtype and id for this case but the BSDs already use this P_PGID and 0 to indicate waiting for the current process's process group. So be nice to user space programmers and don't introduce an unnecessary incompatibility. Some people have noted that the posix description is that waitpid will wait for the current process group, and that in the presence of pthreads that process group can change. To get clarity on this issue I looked at XNU, FreeBSD, and Luminos. All of those flavors of unix waited for the current process group at the time of call and as written could not adapt to the process group changing after the call. At one point Linux did adapt to the current process group changing but that stopped in 161550d74c07 ("pid: sys_wait... fixes"). It has been over 11 years since Linux has that behavior, no programs that fail with the change in behavior have been reported, and I could not find any other unix that does this. So I think it is safe to clarify the definition of current process group, to current process group at the time of the wait function. Signed-off-by: "Eric W. Biederman" Signed-off-by: Christian Brauner Reviewed-by: Oleg Nesterov Cc: "H. Peter Anvin" Cc: Arnd Bergmann Cc: Palmer Dabbelt Cc: Rich Felker Cc: Alistair Francis Cc: Zong Li Cc: Andrew Morton Cc: Oleg Nesterov Cc: Linus Torvalds Cc: Al Viro Cc: Florian Weimer Cc: Adhemerval Zanella Cc: GNU C Library Link: https://lore.kernel.org/r/20190814154400.6371-2-christian.brauner@ubuntu.com Signed-off-by: Gou Hao Signed-off-by: caina Change-Id: Ied6aad01f01a121f28abc085843065dee7782076 Signed-off-by: goutongchen --- kernel/exit.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index f46a25b24693..3a435817293d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1597,10 +1597,13 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, break; case P_PGID: type = PIDTYPE_PGID; - if (upid <= 0) + if (upid < 0) return -EINVAL; - pid = find_get_pid(upid); + if (upid) + pid = find_get_pid(upid); + else + pid = get_task_pid(current, PIDTYPE_PGID); break; case P_PIDFD: type = PIDTYPE_PID; -- Gitee