diff --git a/git-commit b/git-commit index 2f2d04a0c090f13aafa3d98d8e61d12f6d5fed36..2072e240d287c694296b67b26ae6b267b7cea59b 100644 --- a/git-commit +++ b/git-commit @@ -1 +1 @@ -1413e5c37e7705de18736dd2c364bf5506855149 +c394f2e7ec8ae4e88a559a889bf169fffde01328 diff --git a/patch/0038-libct-fix-shared-pidns-detection.patch b/patch/0038-libct-fix-shared-pidns-detection.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b7d3e13199ae0d0124bcb44599b81fa00f0bc59 --- /dev/null +++ b/patch/0038-libct-fix-shared-pidns-detection.patch @@ -0,0 +1,104 @@ +From 7059f60db7a899d9fb00180ff7661eb3fa51ee4c Mon Sep 17 00:00:00 2001 +From: Kir Kolyshkin +Date: Fri, 12 May 2023 16:04:11 -0700 +Subject: [PATCH] libct: fix shared pidns detection + +When someone is using libcontainer to start and kill containers from a +long lived process (i.e. the same process creates and removes the +container), initProcess.wait method is used, which has a kludge to work +around killing containers that do not have their own PID namespace. + +The code that checks for own PID namespace is not entirely correct. +To be exact, it does not set sharePidns flag when the host/caller PID +namespace is implicitly used. As a result, the above mentioned kludge +does not work. + +Fix the issue, add a test case (which fails without the fix). + +Signed-off-by: Kir Kolyshkin +--- + libcontainer/configs/namespaces_syscall.go | 12 ++++++++++++ + libcontainer/container_linux.go | 3 +-- + libcontainer/integration/exec_test.go | 20 +++++++++++++++----- + 3 files changed, 28 insertions(+), 7 deletions(-) + +diff --git a/libcontainer/configs/namespaces_syscall.go b/libcontainer/configs/namespaces_syscall.go +index 0516dba8..543e059a 100644 +--- a/libcontainer/configs/namespaces_syscall.go ++++ b/libcontainer/configs/namespaces_syscall.go +@@ -31,3 +31,15 @@ func (n *Namespaces) CloneFlags() uintptr { + } + return uintptr(flag) + } ++ ++// IsPrivate tells whether the namespace of type t is configured as private ++// (i.e. it exists and is not shared). ++func (n Namespaces) IsPrivate(t NamespaceType) bool { ++ for _, v := range n { ++ if v.Type == t { ++ return v.Path == "" ++ } ++ } ++ // Not found, so implicitly sharing a parent namespace. ++ return false ++} +diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go +index 1fc8feca..1a210fa2 100644 +--- a/libcontainer/container_linux.go ++++ b/libcontainer/container_linux.go +@@ -555,7 +555,6 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPa + nsMaps[ns.Type] = ns.Path + } + } +- _, sharePidns := nsMaps[configs.NEWPID] + data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard) + if err != nil { + return nil, err +@@ -600,7 +599,7 @@ func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPa + container: c, + process: p, + bootstrapData: data, +- sharePidns: sharePidns, ++ sharePidns: !c.config.Namespaces.IsPrivate(configs.NEWPID), + } + c.initProcess = init + return init, nil +diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go +index 3da6d96c..1f19ced1 100644 +--- a/libcontainer/integration/exec_test.go ++++ b/libcontainer/integration/exec_test.go +@@ -1456,16 +1456,26 @@ func TestPIDHost(t *testing.T) { + } + } + +-func TestPIDHostInitProcessWait(t *testing.T) { ++func TestHostPidnsInitKill(t *testing.T) { ++ config := newTemplateConfig(t, nil) ++ // Implicitly use host pid ns. ++ config.Namespaces.Remove(configs.NEWPID) ++ testPidnsInitKill(t, config) ++} ++ ++func TestSharedPidnsInitKill(t *testing.T) { ++ config := newTemplateConfig(t, nil) ++ // Explicitly use host pid ns. ++ config.Namespaces.Add(configs.NEWPID, "/proc/1/ns/pid") ++ testPidnsInitKill(t, config) ++} ++ ++func testPidnsInitKill(t *testing.T, config *configs.Config) { + if testing.Short() { + return + } + +- pidns := "/proc/1/ns/pid" +- + // Run a container with two long-running processes. +- config := newTemplateConfig(t, nil) +- config.Namespaces.Add(configs.NEWPID, pidns) + container, err := newContainer(t, config) + ok(t, err) + defer func() { +-- +2.33.0 + diff --git a/patch/0039-libct-fix-a-race-with-systemd-removal.patch b/patch/0039-libct-fix-a-race-with-systemd-removal.patch new file mode 100644 index 0000000000000000000000000000000000000000..ab9219c735f5128325029439d9cce838b11bb902 --- /dev/null +++ b/patch/0039-libct-fix-a-race-with-systemd-removal.patch @@ -0,0 +1,97 @@ +From 14ffe49dcb6cda0b5c08590127fbd69d333d88e9 Mon Sep 17 00:00:00 2001 +From: Kir Kolyshkin +Date: Tue, 4 Apr 2023 16:59:43 -0700 +Subject: [PATCH] libct: fix a race with systemd removal + +For a previous attempt to fix that (and added test cases), see commit +9087f2e827d971. + +Alas, it's not always working because of cgroup directory TOCTOU. + +To solve this and avoid the race, add an error _after_ the operation. +Implement it as a method that ignores the error that should be ignored. +Instead of currentStatus(), use faster runType(), since we are not +interested in Paused status here. + +For Processes(), remove the pre-op check, and only use it after getting +an error, making the non-error path more straightforward. + +For Signal(), add a second check after getting an error. The first check +is left as is because signalAllProcesses might print a warning if the +cgroup does not exist, and we'd like to avoid that. + +This should fix an occasional failure like this one: + + not ok 84 kill detached busybox + # (in test file tests/integration/kill.bats, line 27) + # `[ "$status" -eq 0 ]' failed + .... + # runc kill test_busybox KILL (status=0): + # runc kill -a test_busybox 0 (status=1): + # time="2023-04-04T18:24:27Z" level=error msg="lstat /sys/fs/cgroup/devices/system.slice/runc-test_busybox.scope: no such file or directory" + +Signed-off-by: Kir Kolyshkin +--- + libcontainer/container_linux.go | 33 +++++++++++++++++++++------------ + 1 file changed, 21 insertions(+), 12 deletions(-) + +diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go +index 1a210fa2..6902f7ba 100644 +--- a/libcontainer/container_linux.go ++++ b/libcontainer/container_linux.go +@@ -146,19 +146,27 @@ func (c *linuxContainer) OCIState() (*specs.State, error) { + return c.currentOCIState() + } + +-func (c *linuxContainer) Processes() ([]int, error) { +- var pids []int +- status, err := c.currentStatus() +- if err != nil { +- return pids, err ++// ignoreCgroupError filters out cgroup-related errors that can be ignored, ++// because the container is stopped and its cgroup is gone. ++func (c *Container) ignoreCgroupError(err error) error { ++ if err == nil { ++ return nil + } +- // for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited +- if status == Stopped && !c.cgroupManager.Exists() { +- return pids, nil ++ if errors.Is(err, os.ErrNotExist) && c.runType() == Stopped && !c.cgroupManager.Exists() { ++ return nil + } ++ return err ++} + +- pids, err = c.cgroupManager.GetAllPids() +- if err != nil { ++// Processes returns the PIDs inside this container. The PIDs are in the ++// namespace of the calling process. ++// ++// Some of the returned PIDs may no longer refer to processes in the container, ++// unless the container state is PAUSED in which case every PID in the slice is ++// valid. ++func (c *Container) Processes() ([]int, error) { ++ pids, err := c.cgroupManager.GetAllPids() ++ if err = c.ignoreCgroupError(err); err != nil { + return nil, fmt.Errorf("unable to get all container pids: %w", err) + } + return pids, nil +@@ -382,11 +390,12 @@ func (c *linuxContainer) Signal(s os.Signal, all bool) error { + return err + } + if all { +- // for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited + if status == Stopped && !c.cgroupManager.Exists() { ++ // Avoid calling signalAllProcesses which may print ++ // a warning trying to freeze a non-existing cgroup. + return nil + } +- return signalAllProcesses(c.cgroupManager, s) ++ return c.ignoreCgroupError(signalAllProcesses(c.cgroupManager, s)) + } + // to avoid a PID reuse attack + if status == Running || status == Created || status == Paused { +-- +2.33.0 + diff --git a/patch/0040-runc-run-refuse-a-non-empty-cgroup.patch b/patch/0040-runc-run-refuse-a-non-empty-cgroup.patch new file mode 100644 index 0000000000000000000000000000000000000000..b45387d2b0352ffc74ee90e24cb4f50817a8c7ba --- /dev/null +++ b/patch/0040-runc-run-refuse-a-non-empty-cgroup.patch @@ -0,0 +1,38 @@ +From bdb2b089328b794c1890139b972fcf6f75f0c05e Mon Sep 17 00:00:00 2001 +From: Kir Kolyshkin +Date: Thu, 23 Mar 2023 11:57:46 -0700 +Subject: [PATCH] runc run: refuse a non-empty cgroup + +Commit d08bc0c1b3bb2 ("runc run: warn on non-empty cgroup") introduced +a warning when a container is started in a non-empty cgroup. Such +configuration has lots of issues. + +In addition to that, such configuration is not possible at all when +using the systemd cgroup driver. + +As planned, let's promote this warning to an error, and fix the test +case accordingly. + +Signed-off-by: Kir Kolyshkin +--- + libcontainer/factory_linux.go | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go +index e6c71ac3..39fc3121 100644 +--- a/libcontainer/factory_linux.go ++++ b/libcontainer/factory_linux.go +@@ -179,9 +179,7 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err + return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err) + } + if len(pids) != 0 { +- // TODO: return an error. +- logrus.Warnf("container's cgroup is not empty: %d process(es) found", len(pids)) +- logrus.Warn("DEPRECATED: running container in a non-empty cgroup won't be supported in runc 1.2; https://github.com/opencontainers/runc/issues/3132") ++ return nil, fmt.Errorf("container's cgroup is not empty: %d process(es) found", len(pids)) + } + } + +-- +2.33.0 + diff --git a/runc.spec b/runc.spec index dd1003367a2e5b9af503f8c4df4b65d20d9f4a0d..08e9ec71c5662a88fc5453e923d0db70c502c705 100644 --- a/runc.spec +++ b/runc.spec @@ -3,7 +3,7 @@ Name: docker-runc Version: 1.1.3 -Release: 14 +Release: 15 Summary: runc is a CLI tool for spawning and running containers according to the OCI specification. License: ASL 2.0 @@ -54,6 +54,12 @@ install -p -m 755 runc $RPM_BUILD_ROOT/%{_bindir}/runc %{_bindir}/runc %changelog +* Wed Jun 21 2023 zhongjiawei - 1.1.3-15 +- Type:bugfix +- CVE:NA +- SUG:NA +- DESC:sync some patches + * Fri Jun 09 2023 zhongjiawei - 1.1.3-14 - Type:bugfix - CVE:NA diff --git a/series.conf b/series.conf index 3e12a18425aa260e0ad3bba02c939d4a4f12bed8..bfeb3b2bc2aaea54e6bc932a236b6cbc60a1e6b9 100644 --- a/series.conf +++ b/series.conf @@ -35,3 +35,6 @@ patch/0034-runc-Fixed-init-state-error-variable.patch patch/0035-runc-rootless-fix-sys-fs-cgroup-mounts.patch patch/0036-runc-Prohibit-proc-and-sys-to-be-symlinks.patch patch/0037-runc-modify-runc-make-command-to-satisfy-the-compile.patch +patch/0038-libct-fix-shared-pidns-detection.patch +patch/0039-libct-fix-a-race-with-systemd-removal.patch +patch/0040-runc-run-refuse-a-non-empty-cgroup.patch