From f0b652ea61ff6750a8fcc69865d45a7abf37accf Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 2 Jul 2024 20:58:43 +1000 Subject: [PATCH] [1.1] rootfs: try to scope MkdirAll to stay inside the rootfs While we use SecureJoin to try to make all of our target paths inside the container safe, SecureJoin is not safe against an attacker than can change the path after we "resolve" it. os.MkdirAll can inadvertently follow symlinks and thus an attacker could end up tricking runc into creating empty directories on the host (note that the container doesn't get access to these directories, and the host just sees empty directories). However, this could potentially cause DoS issues by (for instance) creating a directory in a conf.d directory for a daemon that doesn't handle subdirectories properly. In addition, the handling for creating file bind-mounts did a plain open(O_CREAT) on the SecureJoin'd path, which is even more obviously unsafe (luckily we didn't use O_TRUNC, or this bug could've allowed an attacker to cause data loss...). Regardless of the symlink issue, opening an untrusted file could result in a DoS if the file is a hung tty or some other "nasty" file. We can use mknodat to safely create a regular file without opening anything anyway (O_CREAT|O_EXCL would also work but it makes the logic a bit more complicated, and we don't want to open the file for any particular reason anyway). libpathrs[1] is the long-term solution for these kinds of problems, but for now we can patch this particular issue by creating a more restricted MkdirAll that refuses to resolve symlinks and does the creation using file descriptors. This is loosely based on a more secure version that filepath-securejoin now has[2] and will be added to libpathrs soon[3]. [1]: https://github.com/openSUSE/libpathrs [2]: https://github.com/cyphar/filepath-securejoin/releases/tag/v0.3.0 [3]: https://github.com/openSUSE/libpathrs/issues/10 Fixes: CVE-2024-45310 Signed-off-by: Aleksa Sarai --- libcontainer/rootfs_linux.go | 31 ++++++--- libcontainer/system/linux.go | 41 +++++++++++ libcontainer/utils/utils_unix.go | 114 +++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 10 deletions(-) diff --git a/libcontainer/rootfs_linux.go b/libcontainer/rootfs_linux.go index e6269bea812..78b6998c38f 100644 --- a/libcontainer/rootfs_linux.go +++ b/libcontainer/rootfs_linux.go @@ -253,7 +253,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error { if c.cgroupns { subsystemPath := filepath.Join(c.root, b.Destination) subsystemName := filepath.Base(b.Destination) - if err := os.MkdirAll(subsystemPath, 0o755); err != nil { + if err := utils.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil { return err } if err := utils.WithProcfd(c.root, b.Destination, func(procfd string) error { @@ -406,15 +406,26 @@ func createMountpoint(rootfs string, m *configs.Mount, mountFd *int, source stri return "", fmt.Errorf("%w: file bind mount over rootfs", errRootfsToFile) } // Make the parent directory. - if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + destDir, destBase := filepath.Split(dest) + destDirFd, err := utils.MkdirAllInRootOpen(rootfs, destDir, 0o755) + if err != nil { return "", fmt.Errorf("make parent dir of file bind-mount: %w", err) } - // Make the target file. - f, err := os.OpenFile(dest, os.O_CREATE, 0o755) - if err != nil { - return "", fmt.Errorf("create target of file bind-mount: %w", err) + defer destDirFd.Close() + // Make the target file. We want to avoid opening any file that is + // already there because it could be a "bad" file like an invalid + // device or hung tty that might cause a DoS, so we use mknodat. + // destBase does not contain any "/" components, and mknodat does + // not follow trailing symlinks, so we can safely just call mknodat + // here. + if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|0o644, 0); err != nil { + // If we get EEXIST, there was already an inode there and + // we can consider that a success. + if !errors.Is(err, unix.EEXIST) { + err = &os.PathError{Op: "mknod regular file", Path: dest, Err: err} + return "", fmt.Errorf("create target of file bind-mount: %w", err) + } } - _ = f.Close() // Nothing left to do. return dest, nil } @@ -433,7 +444,7 @@ func createMountpoint(rootfs string, m *configs.Mount, mountFd *int, source stri } } - if err := os.MkdirAll(dest, 0o755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil { return "", err } return dest, nil @@ -463,7 +474,7 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error { } else if !fi.IsDir() { return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device) } - if err := os.MkdirAll(dest, 0o755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil { return err } // Selinux kernels do not support labeling of /proc or /sys. @@ -751,7 +762,7 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error { if dest == rootfs { return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile) } - if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0o755); err != nil { return err } if bind { diff --git a/libcontainer/system/linux.go b/libcontainer/system/linux.go index 16edc6ba6d9..32bab6922bd 100644 --- a/libcontainer/system/linux.go +++ b/libcontainer/system/linux.go @@ -6,6 +6,8 @@ package system import ( "os" "os/exec" + "runtime" + "strings" "unsafe" "golang.org/x/sys/unix" @@ -102,3 +104,42 @@ func GetSubreaper() (int, error) { return int(i), nil } + +func prepareAt(dir *os.File, path string) (int, string) { + if dir == nil { + return unix.AT_FDCWD, path + } + + // Rather than just filepath.Join-ing path here, do it manually so the + // error and handle correctly indicate cases like path=".." as being + // relative to the correct directory. The handle.Name() might end up being + // wrong but because this is (currently) only used in MkdirAllInRoot, that + // isn't a problem. + dirName := dir.Name() + if !strings.HasSuffix(dirName, "/") { + dirName += "/" + } + fullPath := dirName + path + + return int(dir.Fd()), fullPath +} + +func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) { + dirFd, fullPath := prepareAt(dir, path) + fd, err := unix.Openat(dirFd, path, flags, mode) + if err != nil { + return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return os.NewFile(uintptr(fd), fullPath), nil +} + +func Mkdirat(dir *os.File, path string, mode uint32) error { + dirFd, fullPath := prepareAt(dir, path) + err := unix.Mkdirat(dirFd, path, mode) + if err != nil { + err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err} + } + runtime.KeepAlive(dir) + return err +} diff --git a/libcontainer/utils/utils_unix.go b/libcontainer/utils/utils_unix.go index 0d95a203789..460b94cef3f 100644 --- a/libcontainer/utils/utils_unix.go +++ b/libcontainer/utils/utils_unix.go @@ -4,12 +4,17 @@ package utils import ( + "errors" "fmt" "os" + "path/filepath" "strconv" "strings" _ "unsafe" // for go:linkname + "github.com/opencontainers/runc/libcontainer/system" + + securejoin "github.com/cyphar/filepath-securejoin" "golang.org/x/sys/unix" ) @@ -130,3 +135,112 @@ func IsLexicallyInRoot(root, path string) bool { } return strings.HasPrefix(path, root) } + +// MkdirAllInRootOpen attempts to make +// +// path, _ := securejoin.SecureJoin(root, unsafePath) +// os.MkdirAll(path, mode) +// os.Open(path) +// +// safer against attacks where components in the path are changed between +// SecureJoin returning and MkdirAll (or Open) being called. In particular, we +// try to detect any symlink components in the path while we are doing the +// MkdirAll. +// +// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode +// (the suid/sgid/sticky bits are not the same as for os.FileMode). +// +// NOTE: If unsafePath is a subpath of root, we assume that you have already +// called SecureJoin and so we use the provided path verbatim without resolving +// any symlinks (this is done in a way that avoids symlink-exchange races). +// This means that the path also must not contain ".." elements, otherwise an +// error will occur. +// +// This is a somewhat less safe alternative to +// , but it should +// detect attempts to trick us into creating directories outside of the root. +// We should migrate to securejoin.MkdirAll once it is merged. +func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) { + // If the path is already "within" the root, use it verbatim. + fullPath := unsafePath + if !IsLexicallyInRoot(root, unsafePath) { + var err error + fullPath, err = securejoin.SecureJoin(root, unsafePath) + if err != nil { + return nil, err + } + } + subPath, err := filepath.Rel(root, fullPath) + if err != nil { + return nil, err + } + + // Check for any silly mode bits. + if mode&^0o7777 != 0 { + return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode) + } + + currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0) + if err != nil { + return nil, fmt.Errorf("open root handle: %w", err) + } + defer func() { + if Err != nil { + currentDir.Close() + } + }() + + for _, part := range strings.Split(subPath, string(filepath.Separator)) { + switch part { + case "", ".": + // Skip over no-op components. + continue + case "..": + return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath) + } + + nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + switch { + case err == nil: + // Update the currentDir. + _ = currentDir.Close() + currentDir = nextDir + + case errors.Is(err, unix.ENOTDIR): + // This might be a symlink or some other random file. Either way, + // error out. + return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR) + + case errors.Is(err, os.ErrNotExist): + // Luckily, mkdirat will not follow trailing symlinks, so this is + // safe to do as-is. + if err := system.Mkdirat(currentDir, part, mode); err != nil { + return nil, err + } + // Open the new directory. There is a race here where an attacker + // could swap the directory with a different directory, but + // MkdirAll's fuzzy semantics mean we don't care about that. + nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) + if err != nil { + return nil, fmt.Errorf("open newly created directory: %w", err) + } + // Update the currentDir. + _ = currentDir.Close() + currentDir = nextDir + + default: + return nil, err + } + } + return currentDir, nil +} + +// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the +// returned handle, for callers that don't need to use it. +func MkdirAllInRoot(root, unsafePath string, mode uint32) error { + f, err := MkdirAllInRootOpen(root, unsafePath, mode) + if err == nil { + _ = f.Close() + } + return err +}