Skip to content

Commit

Permalink
Fix --preserve-fds, eliminate stray FD being passed into container (#…
Browse files Browse the repository at this point in the history
…2893)

* Fix preserve-fds flag

Signed-off-by: Aidan Hobson Sayers <aidanhs@cantab.net>

* Fix a stray FD leaking in containers when using preserve-fd

Signed-off-by: Aidan Hobson Sayers <aidanhs@cantab.net>

* Add tests for preserve-fds, extend test harness

Test harness additionally needed to support

1. tests that cannot run in parallel
2. tests that need to customise create arguments

Signed-off-by: Aidan Hobson Sayers <aidanhs@cantab.net>

* Opt-out a test from runc that it errors on

Signed-off-by: Aidan Hobson Sayers <aidanhs@cantab.net>

* chore: minor cleanup of comments

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>

---------

Signed-off-by: Aidan Hobson Sayers <aidanhs@cantab.net>
Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>
Co-authored-by: Yashodhan Joshi <yjdoc2@gmail.com>
  • Loading branch information
aidanhs and YJDoc2 authored Dec 30, 2024
1 parent e6b4a60 commit 83d87a2
Show file tree
Hide file tree
Showing 16 changed files with 303 additions and 68 deletions.
3 changes: 3 additions & 0 deletions crates/libcontainer/src/process/container_init_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,9 @@ pub fn container_init_process(
// will be closed after execve into the container payload. We can't close the
// fds immediately since we at least still need it for the pipe used to wait on
// starting the container.
//
// Note: this should happen very late, in order to avoid accidentally leaking FDs
// Please refer to https://github.com/opencontainers/runc/security/advisories/GHSA-xr7r-f8xq-vfvv for more details.
syscall.close_range(preserve_fds).map_err(|err| {
tracing::error!(?err, "failed to cleanup extra fds");
InitProcessError::SyscallOther(err)
Expand Down
9 changes: 0 additions & 9 deletions crates/libcontainer/src/process/container_main_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,6 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, bo
})
};

// Before starting the intermediate process, mark all non-stdio open files as O_CLOEXEC
// to ensure we don't leak any file descriptors to the intermediate process.
// Please refer to https://github.com/opencontainers/runc/security/advisories/GHSA-xr7r-f8xq-vfvv for more details.
let syscall = container_args.syscall.create_syscall();
syscall.close_range(0).map_err(|err| {
tracing::error!(?err, "failed to cleanup extra fds");
ProcessError::SyscallOther(err)
})?;

let container_clone_fn = if container_args.as_sibling {
fork::container_clone_sibling
} else {
Expand Down
21 changes: 15 additions & 6 deletions crates/libcontainer/src/syscall/linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use nix::fcntl::{open, OFlag};
use nix::mount::{mount, umount2, MntFlags, MsFlags};
use nix::sched::{unshare, CloneFlags};
use nix::sys::stat::{mknod, Mode, SFlag};
use nix::unistd::{chown, chroot, fchdir, pivot_root, sethostname, Gid, Uid};
use nix::unistd::{chown, chroot, close, fchdir, pivot_root, sethostname, Gid, Uid};
use oci_spec::runtime::PosixRlimit;

use super::{Result, Syscall, SyscallError};
Expand Down Expand Up @@ -232,11 +232,15 @@ impl Syscall for LinuxSyscall {
/// Function to set given path as root path inside process
fn pivot_rootfs(&self, path: &Path) -> Result<()> {
// open the path as directory and read only
let newroot =
open(path, OFlag::O_DIRECTORY | OFlag::O_RDONLY, Mode::empty()).map_err(|errno| {
tracing::error!(?errno, ?path, "failed to open the new root for pivot root");
errno
})?;
let newroot = open(
path,
OFlag::O_DIRECTORY | OFlag::O_RDONLY | OFlag::O_CLOEXEC,
Mode::empty(),
)
.map_err(|errno| {
tracing::error!(?errno, ?path, "failed to open the new root for pivot root");
errno
})?;

// make the given path as the root directory for the container
// see https://man7.org/linux/man-pages/man2/pivot_root.2.html, specially the notes
Expand Down Expand Up @@ -279,6 +283,11 @@ impl Syscall for LinuxSyscall {
errno
})?;

close(newroot).map_err(|errno| {
tracing::error!(?errno, ?newroot, "failed to close new root directory");
errno
})?;

Ok(())
}

Expand Down
3 changes: 3 additions & 0 deletions tests/contest/contest/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use tests::cgroups;
use crate::tests::devices::get_devices_test;
use crate::tests::domainname::get_domainname_tests;
use crate::tests::example::get_example_test;
use crate::tests::fd_control::get_fd_control_test;
use crate::tests::hooks::get_hooks_tests;
use crate::tests::hostname::get_hostname_test;
use crate::tests::intel_rdt::get_intel_rdt_test;
Expand Down Expand Up @@ -125,6 +126,7 @@ fn main() -> Result<()> {
let process_rlimtis = get_process_rlimits_test();
let no_pivot = get_no_pivot_test();
let process_oom_score_adj = get_process_oom_score_adj_test();
let fd_control = get_fd_control_test();

tm.add_test_group(Box::new(cl));
tm.add_test_group(Box::new(cc));
Expand Down Expand Up @@ -154,6 +156,7 @@ fn main() -> Result<()> {
tm.add_test_group(Box::new(process_rlimtis));
tm.add_test_group(Box::new(no_pivot));
tm.add_test_group(Box::new(process_oom_score_adj));
tm.add_test_group(Box::new(fd_control));

tm.add_test_group(Box::new(io_priority_test));
tm.add_cleanup(Box::new(cgroups::cleanup_v1));
Expand Down
113 changes: 113 additions & 0 deletions tests/contest/contest/src/tests/fd_control/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
use std::fs;
use std::os::fd::{AsRawFd, RawFd};

use anyhow::{anyhow, Context, Result};
use oci_spec::runtime::{ProcessBuilder, Spec, SpecBuilder};
use test_framework::{test_result, ConditionalTest, Test, TestGroup, TestResult};

use crate::utils::{is_runtime_runc, test_inside_container, CreateOptions};

fn create_spec() -> Result<Spec> {
SpecBuilder::default()
.process(
ProcessBuilder::default()
.args(
["runtimetest", "fd_control"]
.iter()
.map(|s| s.to_string())
.collect::<Vec<String>>(),
)
.build()?,
)
.build()
.context("failed to create spec")
}

fn open_devnull_no_cloexec() -> Result<(fs::File, RawFd)> {
// Rust std by default sets cloexec, so we undo it
let devnull = fs::File::open("/dev/null")?;
let devnull_fd = devnull.as_raw_fd();
let flags = nix::fcntl::fcntl(devnull_fd, nix::fcntl::FcntlArg::F_GETFD)?;
let mut flags = nix::fcntl::FdFlag::from_bits_retain(flags);
flags.remove(nix::fcntl::FdFlag::FD_CLOEXEC);
nix::fcntl::fcntl(devnull_fd, nix::fcntl::FcntlArg::F_SETFD(flags))?;
Ok((devnull, devnull_fd))
}

// If not opening any other FDs, verify youki itself doesnt open anything that gets
// leaked in if passing --preserve-fds with a large number
// NOTE: this will also fail if the test harness itself starts leaking FDs
fn only_stdio_test() -> TestResult {
let spec = test_result!(create_spec());
test_inside_container(
spec,
&CreateOptions::default().with_extra_args(&["--preserve-fds".as_ref(), "100".as_ref()]),
&|bundle_path| {
fs::write(bundle_path.join("num-fds"), "0".as_bytes())?;
Ok(())
},
)
}

// If we know we have an open FD without cloexec, it should be closed if preserve-fds
// is 0 (the default)
fn closes_fd_test() -> TestResult {
// Open this before the setup function so it's kept alive for the container lifetime
let (_devnull, _devnull_fd) = match open_devnull_no_cloexec() {
Ok(v) => v,
Err(e) => return TestResult::Failed(anyhow!("failed to open dev null: {}", e)),
};

let spec = test_result!(create_spec());
test_inside_container(
spec,
&CreateOptions::default().with_extra_args(&["--preserve-fds".as_ref(), "0".as_ref()]),
&|bundle_path| {
fs::write(bundle_path.join("num-fds"), "0".as_bytes())?;
Ok(())
},
)
}

// Given an open FD, verify it can be passed down with preserve-fds
fn pass_single_fd_test() -> TestResult {
// Open this before the setup function so it's kept alive for the container lifetime
let (_devnull, devnull_fd) = match open_devnull_no_cloexec() {
Ok(v) => v,
Err(e) => return TestResult::Failed(anyhow!("failed to open dev null: {}", e)),
};

let spec = test_result!(create_spec());
test_inside_container(
spec,
&CreateOptions::default().with_extra_args(&[
"--preserve-fds".as_ref(),
(devnull_fd - 2).to_string().as_ref(), // relative to stdio
]),
&|bundle_path| {
fs::write(bundle_path.join("num-fds"), "1".as_bytes())?;
Ok(())
},
)
}

pub fn get_fd_control_test() -> TestGroup {
let mut test_group = TestGroup::new("fd_control");
test_group.set_nonparallel(); // fds are process-wide state
let test_only_stdio = ConditionalTest::new(
"only_stdio",
// runc errors if any of the N passed FDs via preserve-fd are not currently open
Box::new(|| !is_runtime_runc()),
Box::new(only_stdio_test),
);
let test_closes_fd = Test::new("closes_fd", Box::new(closes_fd_test));
let test_pass_single_fd = Test::new("pass_single_fd", Box::new(pass_single_fd_test));
// adding separately as one is conditional test and others are normal
test_group.add(vec![Box::new(test_only_stdio)]);
test_group.add(vec![
Box::new(test_closes_fd),
Box::new(test_pass_single_fd),
]);

test_group
}
4 changes: 4 additions & 0 deletions tests/contest/contest/src/tests/lifecycle/container_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ impl TestableGroup for ContainerCreate {
"create"
}

fn parallel(&self) -> bool {
true
}

fn run_all(&self) -> Vec<(&'static str, TestResult)> {
vec![
("empty_id", self.create_empty_id()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ impl TestableGroup for ContainerLifecycle {
"lifecycle"
}

fn parallel(&self) -> bool {
true
}

fn run_all(&self) -> Vec<(&'static str, TestResult)> {
vec![
("create", self.create()),
Expand Down
1 change: 1 addition & 0 deletions tests/contest/contest/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pub mod cgroups;
pub mod devices;
pub mod domainname;
pub mod example;
pub mod fd_control;
pub mod hooks;
pub mod hostname;
pub mod intel_rdt;
Expand Down
31 changes: 10 additions & 21 deletions tests/contest/contest/src/tests/pidfile/pidfile_test.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use std::fs::File;
use std::process::{Command, Stdio};

use anyhow::anyhow;
use test_framework::{Test, TestGroup, TestResult};
use uuid::Uuid;

use crate::utils::{
delete_container, generate_uuid, get_runtime_path, get_state, kill_container, prepare_bundle,
State,
create_container, delete_container, generate_uuid, get_state, kill_container, prepare_bundle,
CreateOptions, State,
};

#[inline]
Expand All @@ -17,8 +16,6 @@ fn cleanup(id: &Uuid, bundle: &tempfile::TempDir) {
delete_container(&str_id, bundle).unwrap().wait().unwrap();
}

// here we have to manually create and manage the container
// as the test_inside container does not provide a way to set the pid file argument
fn test_pidfile() -> TestResult {
// create id for the container and pidfile
let container_id = generate_uuid();
Expand All @@ -30,22 +27,14 @@ fn test_pidfile() -> TestResult {
let _ = File::create(&pidfile_path).unwrap();

// start the container
Command::new(get_runtime_path())
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null())
.arg("--root")
.arg(bundle.as_ref().join("runtime"))
.arg("create")
.arg(container_id.to_string())
.arg("--bundle")
.arg(bundle.as_ref().join("bundle"))
.arg("--pid-file")
.arg(pidfile_path)
.spawn()
.unwrap()
.wait()
.unwrap();
create_container(
&container_id.to_string(),
&bundle,
&CreateOptions::default().with_extra_args(&["--pid-file".as_ref(), pidfile_path.as_ref()]),
)
.unwrap()
.wait()
.unwrap();

let (out, err) = get_state(&container_id.to_string(), &bundle).unwrap();

Expand Down
2 changes: 1 addition & 1 deletion tests/contest/contest/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ pub use support::{
};
pub use test_utils::{
create_container, delete_container, get_state, kill_container, test_inside_container,
test_outside_container, State,
test_outside_container, CreateOptions, State,
};
14 changes: 11 additions & 3 deletions tests/contest/contest/src/utils/test_utils.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Contains utility functions for testing
//! Similar to https://github.com/opencontainers/runtime-tools/blob/master/validation/util/test.go
use std::collections::HashMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::process::{Child, Command, ExitStatus, Stdio};
use std::thread::sleep;
Expand Down Expand Up @@ -43,11 +44,17 @@ pub struct ContainerData {
}

#[derive(Debug, Default)]
pub struct CreateOptions {
pub struct CreateOptions<'a> {
extra_args: &'a [&'a OsStr],
no_pivot: bool,
}

impl CreateOptions {
impl<'a> CreateOptions<'a> {
pub fn with_extra_args(mut self, extra_args: &'a [&'a OsStr]) -> Self {
self.extra_args = extra_args;
self
}

pub fn with_no_pivot_root(mut self) -> Self {
self.no_pivot = true;
self
Expand All @@ -64,7 +71,8 @@ fn create_container_command<P: AsRef<Path>>(id: &str, dir: P, options: &CreateOp
.arg("create")
.arg(id)
.arg("--bundle")
.arg(dir.as_ref().join("bundle"));
.arg(dir.as_ref().join("bundle"))
.args(options.extra_args);
if options.no_pivot {
command.arg("--no-pivot");
}
Expand Down
1 change: 1 addition & 0 deletions tests/contest/runtimetest/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ fn main() {
"process_rlimits" => tests::validate_process_rlimits(&spec),
"no_pivot" => tests::validate_rootfs(),
"process_oom_score_adj" => tests::validate_process_oom_score_adj(&spec),
"fd_control" => tests::validate_fd_control(&spec),
_ => eprintln!("error due to unexpected execute test name: {execute_test}"),
}
}
45 changes: 45 additions & 0 deletions tests/contest/runtimetest/src/tests.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::env;
use std::ffi::OsStr;
use std::fs::{self, read_dir};
use std::os::linux::fs::MetadataExt;
use std::os::unix::fs::{FileTypeExt, PermissionsExt};
Expand Down Expand Up @@ -775,3 +776,47 @@ pub fn validate_process_oom_score_adj(spec: &Spec) {
eprintln!("Unexpected oom_score_adj, expected: {expected_value} found: {actual_value}");
}
}

pub fn validate_fd_control(_spec: &Spec) {
// --preserve-fds does not get passed via the spec so we have to communicate information
// via the root filesystem
let expected_num_fds: usize = fs::read_to_string("/num-fds").unwrap().parse().unwrap();

let mut entries = vec![];
let stdio: &[&OsStr] = &["0".as_ref(), "1".as_ref(), "2".as_ref()];
for entry in fs::read_dir("/proc/self/fd").unwrap() {
let entry = entry.unwrap();
let name = entry.file_name();
if stdio.contains(&name.as_os_str()) {
// Ignore stdio
continue;
}
entries.push((entry.path(), fs::read_link(entry.path())))
}

// NOTE: we do this in a separate loop so we can filter out the dirfd used behind
// the scenes in 'fs::read_dir'. It is important to *not* store the full DirEntry
// type, as that keeps the dirfd open.
let mut fd_details = vec![];
let mut found_dirfd = false;
for (path, linkpath) in &entries {
println!("found fd in container {} {:?}", path.display(), linkpath);
// The difference between metadata.unwrap() and fs::metadata is that the latter
// will now try to follow the symlink
match fs::metadata(path) {
Ok(m) => fd_details.push((path, linkpath, m)),
Err(e) if e.kind() == std::io::ErrorKind::NotFound && !found_dirfd => {
// Expected for the dirfd
println!("(ignoring dirfd)");
found_dirfd = true
}
Err(e) => {
eprintln!("unexpected error reading metadata: {}", e)
}
}
}

if fd_details.len() != expected_num_fds {
eprintln!("mismatched fds inside container! {:?}", fd_details);
}
}
Loading

0 comments on commit 83d87a2

Please # to comment.