From bd0c7cabf5f8b41db2b1d87666e9e24d0b05f146 Mon Sep 17 00:00:00 2001 From: zhongjiawei Date: Tue, 4 Apr 2023 14:39:03 +0800 Subject: [PATCH] runc:fix /sys/fs/cgroup mounts and Prohibit /proc and /sys to be symlinks --- git-commit | 2 +- ...nc-rootless-fix-sys-fs-cgroup-mounts.patch | 121 ++++++++++++++++++ ...Prohibit-proc-and-sys-to-be-symlinks.patch | 113 ++++++++++++++++ runc.spec | 8 +- series.conf | 2 + 5 files changed, 244 insertions(+), 2 deletions(-) create mode 100644 patch/0035-runc-rootless-fix-sys-fs-cgroup-mounts.patch create mode 100644 patch/0036-runc-Prohibit-proc-and-sys-to-be-symlinks.patch diff --git a/git-commit b/git-commit index 2684b12..01f612a 100644 --- a/git-commit +++ b/git-commit @@ -1 +1 @@ -0912c2b6b709b0ded8a36dd5b008ceb2d5476b20 +5eea4318c9f09e182936b04e4e516c5eae8e020c diff --git a/patch/0035-runc-rootless-fix-sys-fs-cgroup-mounts.patch b/patch/0035-runc-rootless-fix-sys-fs-cgroup-mounts.patch new file mode 100644 index 0000000..f76bb2e --- /dev/null +++ b/patch/0035-runc-rootless-fix-sys-fs-cgroup-mounts.patch @@ -0,0 +1,121 @@ +From fd61dbb032e526bd323702d954520669761647bb Mon Sep 17 00:00:00 2001 +From: Akihiro Suda +Date: Mon, 26 Dec 2022 12:04:26 +0900 +Subject: [PATCH] rootless: fix /sys/fs/cgroup mounts + +It was found that rootless runc makes `/sys/fs/cgroup` writable in following conditons: + +1. when runc is executed inside the user namespace, and the config.json does not specify the cgroup namespace to be unshared + (e.g.., `(docker|podman|nerdctl) run --cgroupns=host`, with Rootless Docker/Podman/nerdctl) +2. or, when runc is executed outside the user namespace, and `/sys` is mounted with `rbind, ro` + (e.g., `runc spec --rootless`; this condition is very rare) + +A container may gain the write access to user-owned cgroup hierarchy `/sys/fs/cgroup/user.slice/...` on the host. +Other users's cgroup hierarchies are not affected. + +To fix the issue, this commit does: +1. Remount `/sys/fs/cgroup` to apply `MS_RDONLY` when it is being bind-mounted +2. Mask `/sys/fs/cgroup` when the bind source is unavailable + +Fix CVE-2023-25809 (GHSA-m8cg-xc2p-r3fc) + +Co-authored-by: Kir Kolyshkin +Signed-off-by: Akihiro Suda +--- + libcontainer/rootfs_linux.go | 53 ++++++++++++++++++++++------------- + tests/integration/mounts.bats | 17 +++++++++++ + 2 files changed, 51 insertions(+), 19 deletions(-) + +diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go +index 280a6332..ec14f97e 100644 +--- a/libcontainer/rootfs_linux.go ++++ b/libcontainer/rootfs_linux.go +@@ -334,26 +334,41 @@ func mountCgroupV2(m *configs.Mount, c *mountConfig) error { + if err := os.MkdirAll(dest, 0o755); err != nil { + return err + } +- return utils.WithProcfd(c.root, m.Destination, func(procfd string) error { +- if err := mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil { +- // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158) +- if errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY) { +- src := fs2.UnifiedMountpoint +- if c.cgroupns && c.cgroup2Path != "" { +- // Emulate cgroupns by bind-mounting +- // the container cgroup path rather than +- // the whole /sys/fs/cgroup. +- src = c.cgroup2Path +- } +- err = mount(src, m.Destination, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "") +- if c.rootlessCgroups && errors.Is(err, unix.ENOENT) { +- err = nil +- } +- } +- return err +- } +- return nil ++ err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error { ++ return mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data) + }) ++ if err == nil || !(errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY)) { ++ return err ++ } ++ ++ // When we are in UserNS but CgroupNS is not unshared, we cannot mount ++ // cgroup2 (#2158), so fall back to bind mount. ++ bindM := &configs.Mount{ ++ Device: "bind", ++ Source: fs2.UnifiedMountpoint, ++ Destination: m.Destination, ++ Flags: unix.MS_BIND | m.Flags, ++ PropagationFlags: m.PropagationFlags, ++ } ++ if c.cgroupns && c.cgroup2Path != "" { ++ // Emulate cgroupns by bind-mounting the container cgroup path ++ // rather than the whole /sys/fs/cgroup. ++ bindM.Source = c.cgroup2Path ++ } ++ // mountToRootfs() handles remounting for MS_RDONLY. ++ // No need to set c.fd here, because mountToRootfs() calls utils.WithProcfd() by itself in mountPropagate(). ++ err = mountToRootfs(bindM, c) ++ if c.rootlessCgroups && errors.Is(err, unix.ENOENT) { ++ // ENOENT (for `src = c.cgroup2Path`) happens when rootless runc is being executed ++ // outside the userns+mountns. ++ // ++ // Mask `/sys/fs/cgroup` to ensure it is read-only, even when `/sys` is mounted ++ // with `rbind,ro` (`runc spec --rootless` produces `rbind,ro` for `/sys`). ++ err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error { ++ return maskPath(procfd, c.label) ++ }) ++ } ++ return err + } + + func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) { +diff --git a/tests/integration/mounts.bats b/tests/integration/mounts.bats +index 1ec675ac..1e72c5b1 100644 +--- a/tests/integration/mounts.bats ++++ b/tests/integration/mounts.bats +@@ -63,3 +63,20 @@ function teardown() { + runc run test_busybox + [ "$status" -eq 0 ] + } ++ ++# https://github.com/opencontainers/runc/security/advisories/GHSA-m8cg-xc2p-r3fc ++@test "runc run [ro /sys/fs/cgroup mount]" { ++ # With cgroup namespace ++ update_config '.process.args |= ["sh", "-euc", "for f in `grep /sys/fs/cgroup /proc/mounts | awk \"{print \\\\$2}\"| uniq`; do grep -w $f /proc/mounts | tail -n1; done"]' ++ runc run test_busybox ++ [ "$status" -eq 0 ] ++ [ "${#lines[@]}" -ne 0 ] ++ for line in "${lines[@]}"; do [[ "${line}" == *'ro,'* ]]; done ++ ++ # Without cgroup namespace ++ update_config '.linux.namespaces -= [{"type": "cgroup"}]' ++ runc run test_busybox ++ [ "$status" -eq 0 ] ++ [ "${#lines[@]}" -ne 0 ] ++ for line in "${lines[@]}"; do [[ "${line}" == *'ro,'* ]]; done ++} +-- +2.33.0 + diff --git a/patch/0036-runc-Prohibit-proc-and-sys-to-be-symlinks.patch b/patch/0036-runc-Prohibit-proc-and-sys-to-be-symlinks.patch new file mode 100644 index 0000000..5a6bd08 --- /dev/null +++ b/patch/0036-runc-Prohibit-proc-and-sys-to-be-symlinks.patch @@ -0,0 +1,113 @@ +From 52559766c5298688a8302180bf50b002623776d9 Mon Sep 17 00:00:00 2001 +From: Kir Kolyshkin +Date: Thu, 16 Mar 2023 14:35:50 -0700 +Subject: [PATCH] Prohibit /proc and /sys to be symlinks + +Commit 3291d66b9844 introduced a check for /proc and /sys, making sure +the destination (dest) is a directory (and not e.g. a symlink). + +Later, a hunk from commit 0ca91f44f switched from using filepath.Join +to SecureJoin for dest. As SecureJoin follows and resolves symlinks, +the check whether dest is a symlink no longer works. + +To fix, do the check without/before using SecureJoin. + +Add integration tests to make sure we won't regress. + +Signed-off-by: Kir Kolyshkin +(cherry picked from commit 0d72adf96dda1b687815bf89bb245b937a2f603c) +Signed-off-by: Sebastiaan van Stijn +--- + libcontainer/rootfs_linux.go | 29 ++++++++++++++++++++--------- + tests/integration/mask.bats | 19 +++++++++++++++++++ + 2 files changed, 39 insertions(+), 9 deletions(-) + +diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go +index ec14f97e..8ce09f6f 100644 +--- a/libcontainer/rootfs_linux.go ++++ b/libcontainer/rootfs_linux.go +@@ -418,25 +418,26 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) { + + func mountToRootfs(m *configs.Mount, c *mountConfig) error { + rootfs := c.root +- mountLabel := c.label +- mountFd := c.fd +- dest, err := securejoin.SecureJoin(rootfs, m.Destination) +- if err != nil { +- return err +- } + ++ // procfs and sysfs are special because we need to ensure they are actually ++ // mounted on a specific path in a container without any funny business. + switch m.Device { + case "proc", "sysfs": + // If the destination already exists and is not a directory, we bail +- // out This is to avoid mounting through a symlink or similar -- which ++ // out. This is to avoid mounting through a symlink or similar -- which + // has been a "fun" attack scenario in the past. + // TODO: This won't be necessary once we switch to libpathrs and we can + // stop all of these symlink-exchange attacks. ++ dest := filepath.Clean(m.Destination) ++ if !strings.HasPrefix(dest, rootfs) { ++ // Do not use securejoin as it resolves symlinks. ++ dest = filepath.Join(rootfs, dest) ++ } + if fi, err := os.Lstat(dest); err != nil { + if !os.IsNotExist(err) { + return err + } +- } else if fi.Mode()&os.ModeDir == 0 { ++ } else if !fi.IsDir() { + return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device) + } + if strings.HasPrefix(m.Destination, "/proc/sys/") { +@@ -445,8 +446,18 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error { + if err := os.MkdirAll(dest, 0o755); err != nil { + return err + } +- // Selinux kernels do not support labeling of /proc or /sys ++ // Selinux kernels do not support labeling of /proc or /sys. + return mountPropagate(m, rootfs, "", nil) ++ } ++ ++ mountLabel := c.label ++ mountFd := c.fd ++ dest, err := securejoin.SecureJoin(rootfs, m.Destination) ++ if err != nil { ++ return err ++ } ++ ++ switch m.Device { + case "mqueue": + if err := os.MkdirAll(dest, 0o755); err != nil { + return err +diff --git a/tests/integration/mask.bats b/tests/integration/mask.bats +index b5f29675..272c879c 100644 +--- a/tests/integration/mask.bats ++++ b/tests/integration/mask.bats +@@ -56,3 +56,22 @@ function teardown() { + [ "$status" -eq 1 ] + [[ "${output}" == *"Operation not permitted"* ]] + } ++ ++@test "mask paths [prohibit symlink /proc]" { ++ ln -s /symlink rootfs/proc ++ runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox ++ [ "$status" -eq 1 ] ++ [[ "${output}" == *"must be mounted on ordinary directory"* ]] ++} ++ ++@test "mask paths [prohibit symlink /sys]" { ++ # In rootless containers, /sys is a bind mount not a real sysfs. ++ requires root ++ ++ ln -s /symlink rootfs/sys ++ runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox ++ [ "$status" -eq 1 ] ++ # On cgroup v1, this may fail before checking if /sys is a symlink, ++ # so we merely check that it fails, and do not check the exact error ++ # message like for /proc above. ++} +-- +2.33.0 + diff --git a/runc.spec b/runc.spec index 8b0d872..6e7bce9 100644 --- a/runc.spec +++ b/runc.spec @@ -3,7 +3,7 @@ Name: docker-runc Version: 1.1.3 -Release: 12 +Release: 13 Summary: runc is a CLI tool for spawning and running containers according to the OCI specification. License: ASL 2.0 @@ -54,6 +54,12 @@ install -p -m 755 runc $RPM_BUILD_ROOT/%{_bindir}/runc %{_bindir}/runc %changelog +* Tue Apr 4 2023 zhongjiawei - 1.1.3-13 +- Type:bugfix +- CVE:NA +- SUG:NA +- DESC:fix rootless /sys/fs/cgroup mounts bug and Prohibit /proc and /sys to be symlinks + * Tue Mar 21 2023 zhongjiawei - 1.1.3-12 - Type:bugfix - CVE:NA diff --git a/series.conf b/series.conf index 07d9c85..6662256 100644 --- a/series.conf +++ b/series.conf @@ -32,3 +32,5 @@ patch/0031-runc-modify-linuxcontainer-starttime-uint64-type-tob.patch patch/0032-runc-make-runc-spec-compatible-1.0.0.rc3.patch patch/0033-runc-libcontainer-skip-chown-of-dev-null-caused-by-fd-red.patch patch/0034-runc-Fixed-init-state-error-variable.patch +patch/0035-runc-rootless-fix-sys-fs-cgroup-mounts.patch +patch/0036-runc-Prohibit-proc-and-sys-to-be-symlinks.patch -- Gitee