diff --git a/Cargo.lock b/Cargo.lock index 3289cc88b..376f4dfad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -43,7 +43,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "once_cell", "version_check", ] @@ -191,7 +191,7 @@ checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" dependencies = [ "addr2line 0.20.0", "cc", - "cfg-if 1.0.0", + "cfg-if", "libc", "miniz_oxide", "object 0.31.1", @@ -407,12 +407,6 @@ dependencies = [ "nom", ] -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - [[package]] name = "cfg-if" version = "1.0.0" @@ -495,16 +489,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" -[[package]] -name = "clone3" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee4e061ea30800291ca09663878f3953840a69b08ce244b3e8b26e894d9f60f" -dependencies = [ - "bitflags 1.3.2", - "uapi", -] - [[package]] name = "cmake" version = "0.1.50" @@ -565,7 +549,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9847f90f32a50b0dcbd68bc23ff242798b13080b97b0569f6ed96a45ce4cf2cd" dependencies = [ "autocfg", - "cfg-if 1.0.0", + "cfg-if", "libc", "scopeguard", "windows-sys 0.33.0", @@ -577,7 +561,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eeaa953eaad386a53111e47172c2fedba671e5684c8dd601a5f474f4f118710f" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -786,7 +770,7 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -801,7 +785,7 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-channel", "crossbeam-deque", "crossbeam-epoch", @@ -815,7 +799,7 @@ version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", ] @@ -825,7 +809,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] @@ -837,7 +821,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" dependencies = [ "autocfg", - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", "memoffset 0.9.0", "scopeguard", @@ -849,7 +833,7 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "crossbeam-utils", ] @@ -859,7 +843,7 @@ version = "0.8.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -947,7 +931,7 @@ version = "5.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "hashbrown 0.12.3", "lock_api", "once_cell", @@ -1038,7 +1022,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "339ee130d97a610ea5a5872d2bbb130fdf68884ff09d3028b81bec8a1ac23bbc" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "dirs-sys-next", ] @@ -1097,7 +1081,7 @@ version = "0.8.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -1218,7 +1202,7 @@ version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b0377f1edc77dbd1118507bc7a66e4ab64d2b90c66f90726dc801e73a8c68f9" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "rustix 0.38.1", "windows-sys 0.48.0", ] @@ -1239,7 +1223,7 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "redox_syscall 0.2.16", "windows-sys 0.48.0", @@ -1457,7 +1441,7 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "js-sys", "libc", "wasi", @@ -1765,7 +1749,7 @@ version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -1981,7 +1965,6 @@ dependencies = [ "bitflags 2.3.3", "caps", "chrono", - "clone3", "fastrand 2.0.0", "futures", "libc", @@ -2020,7 +2003,7 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "winapi", ] @@ -2220,7 +2203,7 @@ version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c84490118f2ee2d74570d114f3d0493cbf02790df303d2707606c3e14e07c96" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "downcast", "fragile", "lazy_static", @@ -2235,7 +2218,7 @@ version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ce75669015c4f47b289fd4d4f56e894e4c96003ffdf3ac51313126f94c6cbb" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "proc-macro2", "quote", "syn 1.0.109", @@ -2272,7 +2255,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" dependencies = [ "bitflags 1.3.2", - "cfg-if 1.0.0", + "cfg-if", "libc", "memoffset 0.7.1", "pin-utils", @@ -2419,7 +2402,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d" dependencies = [ "bitflags 1.3.2", - "cfg-if 1.0.0", + "cfg-if", "foreign-types", "libc", "once_cell", @@ -2478,7 +2461,7 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "redox_syscall 0.3.5", "smallvec", @@ -3469,7 +3452,7 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "cpufeatures", "digest", ] @@ -3728,7 +3711,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" dependencies = [ "autocfg", - "cfg-if 1.0.0", + "cfg-if", "fastrand 1.9.0", "redox_syscall 0.3.5", "rustix 0.37.19", @@ -3803,7 +3786,7 @@ version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "once_cell", ] @@ -3991,7 +3974,7 @@ version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "log", "pin-project-lite", "tracing-attributes", @@ -4084,32 +4067,6 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" -[[package]] -name = "uapi" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "019450240401d342e2a5bc47f7fbaeb002a38fe18197b83788750d7ffb143274" -dependencies = [ - "cc", - "cfg-if 0.1.10", - "libc", - "uapi-proc", -] - -[[package]] -name = "uapi-proc" -version = "0.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54de46f980cea7b2ae8d8f7f9f1c35cf7062c68343e99345ef73758f8e60975a" -dependencies = [ - "lazy_static", - "libc", - "proc-macro2", - "quote", - "regex", - "syn 1.0.109", -] - [[package]] name = "unicase" version = "2.6.0" @@ -4448,7 +4405,7 @@ version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "wasm-bindgen-macro", ] @@ -4496,7 +4453,7 @@ version = "0.4.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "js-sys", "wasm-bindgen", "web-sys", @@ -4605,7 +4562,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea790bcdfb4e6e9d1e5ddf75b4699aac62b078fcc9f27f44e1748165ceea67bf" dependencies = [ "bytes", - "cfg-if 1.0.0", + "cfg-if", "derivative", "indexmap 1.9.3", "js-sys", @@ -4633,7 +4590,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f093937725e242e5529fed27e08ff836c011a9ecc22e6819fb818c2ac6ff5f88" dependencies = [ "backtrace", - "cfg-if 1.0.0", + "cfg-if", "enum-iterator", "enumset", "lazy_static", @@ -4705,7 +4662,7 @@ checksum = "a1e000c2cbd4f9805427af5f3b3446574caf89ab3a1e66c2f3579fbde22b072b" dependencies = [ "backtrace", "cc", - "cfg-if 1.0.0", + "cfg-if", "corosensei", "dashmap", "derivative", @@ -4734,7 +4691,7 @@ dependencies = [ "async-trait", "bincode", "bytes", - "cfg-if 1.0.0", + "cfg-if", "cooked-waker", "dashmap", "derivative", @@ -4789,7 +4746,7 @@ dependencies = [ "anyhow", "bitflags 1.3.2", "byteorder", - "cfg-if 1.0.0", + "cfg-if", "num_enum", "serde", "time 0.2.27", @@ -4843,7 +4800,7 @@ dependencies = [ "async-trait", "bincode", "bumpalo", - "cfg-if 1.0.0", + "cfg-if", "encoding_rs", "fxprof-processed-profile", "indexmap 1.9.3", @@ -4877,7 +4834,7 @@ version = "10.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "284466ef356ce2d909bc0ad470b60c4d0df5df2de9084457e118131b3c779b92" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -4989,7 +4946,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14309cbdf2c395258b124a24757c727403070c0465a28bcc780c4f82f4bca5ff" dependencies = [ "cc", - "cfg-if 1.0.0", + "cfg-if", "rustix 0.37.19", "wasmtime-asm-macros", "windows-sys 0.48.0", @@ -5004,7 +4961,7 @@ dependencies = [ "addr2line 0.19.0", "anyhow", "bincode", - "cfg-if 1.0.0", + "cfg-if", "cpp_demangle", "gimli 0.27.2", "ittapi", @@ -5038,7 +4995,7 @@ version = "10.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b49ceb7e2105a8ebe5614d7bbab6f6ef137a284e371633af60b34925493081f" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "windows-sys 0.48.0", ] @@ -5051,7 +5008,7 @@ checksum = "3a5de4762421b0b2b19e02111ca403632852b53e506e03b4b227ffb0fbfa63c2" dependencies = [ "anyhow", "cc", - "cfg-if 1.0.0", + "cfg-if", "encoding_rs", "indexmap 1.9.3", "libc", diff --git a/crates/libcgroups/src/common.rs b/crates/libcgroups/src/common.rs index aba574f58..84c690dd1 100644 --- a/crates/libcgroups/src/common.rs +++ b/crates/libcgroups/src/common.rs @@ -316,17 +316,22 @@ pub enum CreateCgroupSetupError { Systemd(#[from] systemd::manager::SystemdManagerError), } -pub fn create_cgroup_manager>( - cgroup_path: P, - systemd_cgroup: bool, - container_name: &str, +#[derive(Clone)] +pub struct CgroupConfig { + pub cgroup_path: PathBuf, + pub systemd_cgroup: bool, + pub container_name: String, +} + +pub fn create_cgroup_manager( + config: &CgroupConfig, ) -> Result { let cgroup_setup = get_cgroup_setup().map_err(|err| match err { GetCgroupSetupError::WrappedIo(err) => CreateCgroupSetupError::WrappedIo(err), GetCgroupSetupError::NonDefault => CreateCgroupSetupError::NonDefault, GetCgroupSetupError::FailedToDetect => CreateCgroupSetupError::FailedToDetect, })?; - let cgroup_path = cgroup_path.into(); + let cgroup_path = config.cgroup_path.as_path(); match cgroup_setup { CgroupSetup::Legacy | CgroupSetup::Hybrid => { @@ -334,17 +339,17 @@ pub fn create_cgroup_manager>( } CgroupSetup::Unified => { // ref https://github.com/opencontainers/runtime-spec/blob/main/config-linux.md#cgroups-path - if cgroup_path.is_absolute() || !systemd_cgroup { + if cgroup_path.is_absolute() || !config.systemd_cgroup { return Ok(create_v2_cgroup_manager(cgroup_path)?.any()); } - Ok(create_systemd_cgroup_manager(cgroup_path, container_name)?.any()) + Ok(create_systemd_cgroup_manager(cgroup_path, config.container_name.as_str())?.any()) } } } #[cfg(feature = "v1")] fn create_v1_cgroup_manager( - cgroup_path: PathBuf, + cgroup_path: &Path, ) -> Result { tracing::info!("cgroup manager V1 will be used"); v1::manager::Manager::new(cgroup_path) @@ -352,14 +357,14 @@ fn create_v1_cgroup_manager( #[cfg(not(feature = "v1"))] fn create_v1_cgroup_manager( - _cgroup_path: PathBuf, + _cgroup_path: &Path, ) -> Result { Err(v1::manager::V1ManagerError::NotEnabled) } #[cfg(feature = "v2")] fn create_v2_cgroup_manager( - cgroup_path: PathBuf, + cgroup_path: &Path, ) -> Result { tracing::info!("cgroup manager V2 will be used"); v2::manager::Manager::new(DEFAULT_CGROUP_ROOT.into(), cgroup_path) @@ -367,14 +372,14 @@ fn create_v2_cgroup_manager( #[cfg(not(feature = "v2"))] fn create_v2_cgroup_manager( - _cgroup_path: PathBuf, + _cgroup_path: &Path, ) -> Result { Err(v2::manager::V2ManagerError::NotEnabled) } #[cfg(feature = "systemd")] fn create_systemd_cgroup_manager( - cgroup_path: PathBuf, + cgroup_path: &Path, container_name: &str, ) -> Result { if !systemd::booted() { @@ -399,7 +404,7 @@ fn create_systemd_cgroup_manager( #[cfg(not(feature = "systemd"))] fn create_systemd_cgroup_manager( - _cgroup_path: PathBuf, + _cgroup_path: &Path, _container_name: &str, ) -> Result { Err(systemd::manager::SystemdManagerError::NotEnabled) diff --git a/crates/libcgroups/src/systemd/manager.rs b/crates/libcgroups/src/systemd/manager.rs index 21a2e60d8..b94cdcdbb 100644 --- a/crates/libcgroups/src/systemd/manager.rs +++ b/crates/libcgroups/src/systemd/manager.rs @@ -176,11 +176,11 @@ pub enum SystemdManagerError { impl Manager { pub fn new( root_path: PathBuf, - cgroups_path: PathBuf, + cgroups_path: &Path, container_name: String, use_system: bool, ) -> Result { - let mut destructured_path = cgroups_path.as_path().try_into()?; + let mut destructured_path = cgroups_path.try_into()?; ensure_parent_unit(&mut destructured_path, use_system); let client = match use_system { @@ -191,7 +191,7 @@ impl Manager { let (cgroups_path, delegation_boundary) = Self::construct_cgroups_path(&destructured_path, &client)?; let full_path = root_path.join_safely(&cgroups_path)?; - let fs_manager = FsManager::new(root_path.clone(), cgroups_path.clone())?; + let fs_manager = FsManager::new(root_path.clone(), &cgroups_path)?; Ok(Manager { root_path, diff --git a/crates/libcgroups/src/v1/manager.rs b/crates/libcgroups/src/v1/manager.rs index e1376a883..022ffbbcb 100644 --- a/crates/libcgroups/src/v1/manager.rs +++ b/crates/libcgroups/src/v1/manager.rs @@ -82,10 +82,10 @@ pub enum V1ManagerError { impl Manager { /// Constructs a new cgroup manager with cgroups_path being relative to the root of the subsystem - pub fn new(cgroup_path: PathBuf) -> Result { + pub fn new(cgroup_path: &Path) -> Result { let mut subsystems = HashMap::::new(); for subsystem in CONTROLLERS { - if let Ok(subsystem_path) = Self::get_subsystem_path(&cgroup_path, subsystem) { + if let Ok(subsystem_path) = Self::get_subsystem_path(cgroup_path, subsystem) { subsystems.insert(*subsystem, subsystem_path); } else { tracing::warn!("cgroup {} not supported on this system", subsystem); diff --git a/crates/libcgroups/src/v2/manager.rs b/crates/libcgroups/src/v2/manager.rs index 9aa5b8d56..b23d6f9e0 100644 --- a/crates/libcgroups/src/v2/manager.rs +++ b/crates/libcgroups/src/v2/manager.rs @@ -84,12 +84,12 @@ pub struct Manager { impl Manager { /// Constructs a new cgroup manager with root path being the mount point /// of a cgroup v2 fs and cgroup path being a relative path from the root - pub fn new(root_path: PathBuf, cgroup_path: PathBuf) -> Result { - let full_path = root_path.join_safely(&cgroup_path)?; + pub fn new(root_path: PathBuf, cgroup_path: &Path) -> Result { + let full_path = root_path.join_safely(cgroup_path)?; Ok(Self { root_path, - cgroup_path, + cgroup_path: cgroup_path.to_owned(), full_path, }) } diff --git a/crates/libcontainer/Cargo.toml b/crates/libcontainer/Cargo.toml index aaf1ea0e4..9d31b687a 100644 --- a/crates/libcontainer/Cargo.toml +++ b/crates/libcontainer/Cargo.toml @@ -36,7 +36,6 @@ libseccomp = { version = "0.3.0", optional=true } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" rust-criu = "0.4.0" -clone3 = "0.2.3" regex = "1.9.0" thiserror = "1.0.43" tracing = { version = "0.1.37", features = ["attributes"]} diff --git a/crates/libcontainer/src/channel.rs b/crates/libcontainer/src/channel.rs index 797dee188..a772a10d3 100644 --- a/crates/libcontainer/src/channel.rs +++ b/crates/libcontainer/src/channel.rs @@ -18,12 +18,13 @@ pub enum ChannelError { #[error("channel connection broken")] BrokenChannel, } - +#[derive(Clone)] pub struct Receiver { receiver: RawFd, phantom: PhantomData, } +#[derive(Clone)] pub struct Sender { sender: RawFd, phantom: PhantomData, diff --git a/crates/libcontainer/src/container/builder.rs b/crates/libcontainer/src/container/builder.rs index 80d0f8b94..c568146a2 100644 --- a/crates/libcontainer/src/container/builder.rs +++ b/crates/libcontainer/src/container/builder.rs @@ -1,18 +1,18 @@ use crate::error::{ErrInvalidID, LibcontainerError}; -use crate::workload::default::DefaultExecutor; -use crate::workload::{Executor, ExecutorManager}; -use crate::{syscall::Syscall, utils::PathBufExt}; +use crate::syscall::syscall::SyscallType; +use crate::utils::PathBufExt; +use crate::workload::{self, Executor}; use std::path::PathBuf; use super::{init_builder::InitContainerBuilder, tenant_builder::TenantContainerBuilder}; -pub struct ContainerBuilder<'a> { +pub struct ContainerBuilder { /// Id of the container pub(super) container_id: String, /// Root directory for container state pub(super) root_path: PathBuf, /// Interface to operating system primitives - pub(super) syscall: &'a dyn Syscall, + pub(super) syscall: SyscallType, /// File which will be used to communicate the pid of the /// container process to the higher level runtime pub(super) pid_file: Option, @@ -22,7 +22,7 @@ pub struct ContainerBuilder<'a> { pub(super) preserve_fds: i32, /// Manage the functions that actually run on the container /// Default executes the specified execution of a generic command - pub(super) executor_manager: ExecutorManager, + pub(super) executor: Executor, } /// Builder that can be used to configure the common properties of @@ -32,12 +32,11 @@ pub struct ContainerBuilder<'a> { /// /// ```no_run /// use libcontainer::container::builder::ContainerBuilder; -/// use libcontainer::syscall::syscall::create_syscall; -/// use libcontainer::workload::default::DefaultExecutor; +/// use libcontainer::syscall::syscall::SyscallType; /// /// ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), -/// create_syscall().as_ref(), +/// SyscallType::default(), /// ) /// .with_root_path("/run/containers/youki").expect("invalid root path") /// .with_pid_file(Some("/var/run/docker.pid")).expect("invalid pid file") @@ -45,7 +44,7 @@ pub struct ContainerBuilder<'a> { /// .as_init("/var/run/docker/bundle") /// .build(); /// ``` -impl<'a> ContainerBuilder<'a> { +impl ContainerBuilder { /// Generates the base configuration for a container which can be /// transformed into either a init container or a tenant container /// @@ -53,15 +52,14 @@ impl<'a> ContainerBuilder<'a> { /// /// ```no_run /// use libcontainer::container::builder::ContainerBuilder; - /// use libcontainer::syscall::syscall::create_syscall; - /// use libcontainer::workload::default::DefaultExecutor; + /// use libcontainer::syscall::syscall::SyscallType; /// /// let builder = ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ); /// ``` - pub fn new(container_id: String, syscall: &'a dyn Syscall) -> Self { + pub fn new(container_id: String, syscall: SyscallType) -> Self { let root_path = PathBuf::from("/run/youki"); Self { container_id, @@ -70,9 +68,7 @@ impl<'a> ContainerBuilder<'a> { pid_file: None, console_socket: None, preserve_fds: 0, - executor_manager: ExecutorManager { - executors: vec![Box::::default()], - }, + executor: workload::default::get_executor(), } } @@ -117,19 +113,18 @@ impl<'a> ContainerBuilder<'a> { /// /// ```no_run /// # use libcontainer::container::builder::ContainerBuilder; - /// # use libcontainer::syscall::syscall::create_syscall; - /// # use libcontainer::workload::default::DefaultExecutor; + /// # use libcontainer::syscall::syscall::SyscallType; /// /// ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .as_tenant() /// .with_container_args(vec!["sleep".to_owned(), "9001".to_owned()]) /// .build(); /// ``` #[allow(clippy::wrong_self_convention)] - pub fn as_tenant(self) -> TenantContainerBuilder<'a> { + pub fn as_tenant(self) -> TenantContainerBuilder { TenantContainerBuilder::new(self) } @@ -138,19 +133,18 @@ impl<'a> ContainerBuilder<'a> { /// /// ```no_run /// # use libcontainer::container::builder::ContainerBuilder; - /// # use libcontainer::syscall::syscall::create_syscall; - /// # use libcontainer::workload::default::DefaultExecutor; + /// # use libcontainer::syscall::syscall::SyscallType; /// /// ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .as_init("/var/run/docker/bundle") /// .with_systemd(false) /// .build(); /// ``` #[allow(clippy::wrong_self_convention)] - pub fn as_init>(self, bundle: P) -> InitContainerBuilder<'a> { + pub fn as_init>(self, bundle: P) -> InitContainerBuilder { InitContainerBuilder::new(self, bundle.into()) } @@ -159,12 +153,11 @@ impl<'a> ContainerBuilder<'a> { /// /// ```no_run /// # use libcontainer::container::builder::ContainerBuilder; - /// # use libcontainer::syscall::syscall::create_syscall; - /// # use libcontainer::workload::default::DefaultExecutor; + /// # use libcontainer::syscall::syscall::SyscallType; /// /// ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .with_root_path("/run/containers/youki").expect("invalid root path"); /// ``` @@ -184,12 +177,11 @@ impl<'a> ContainerBuilder<'a> { /// /// ```no_run /// # use libcontainer::container::builder::ContainerBuilder; - /// # use libcontainer::syscall::syscall::create_syscall; - /// # use libcontainer::workload::default::DefaultExecutor; + /// # use libcontainer::syscall::syscall::SyscallType; /// /// ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .with_pid_file(Some("/var/run/docker.pid")).expect("invalid pid file"); /// ``` @@ -214,12 +206,11 @@ impl<'a> ContainerBuilder<'a> { /// /// ```no_run /// # use libcontainer::container::builder::ContainerBuilder; - /// # use libcontainer::syscall::syscall::create_syscall; - /// # use libcontainer::workload::default::DefaultExecutor; + /// # use libcontainer::syscall::syscall::SyscallType; /// /// ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .with_console_socket(Some("/var/run/docker/sock.tty")); /// ``` @@ -234,12 +225,11 @@ impl<'a> ContainerBuilder<'a> { /// /// ```no_run /// # use libcontainer::container::builder::ContainerBuilder; - /// # use libcontainer::syscall::syscall::create_syscall; - /// # use libcontainer::workload::default::DefaultExecutor; + /// # use libcontainer::syscall::syscall::SyscallType; /// /// ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .with_preserved_fds(5); /// ``` @@ -253,31 +243,24 @@ impl<'a> ContainerBuilder<'a> { /// /// ```no_run /// # use libcontainer::container::builder::ContainerBuilder; - /// # use libcontainer::syscall::syscall::create_syscall; - /// # use libcontainer::workload::default::DefaultExecutor; + /// # use libcontainer::syscall::syscall::SyscallType; + /// # use libcontainer::workload::default::get_executor; /// /// ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) - /// .with_executor(vec![Box::::default()]); + /// .with_executor(get_executor()); /// ``` - pub fn with_executor( - mut self, - executors: Vec>, - ) -> Result { - if executors.is_empty() { - return Err(LibcontainerError::NoExecutors); - }; - self.executor_manager = ExecutorManager { executors }; - Ok(self) + pub fn with_executor(mut self, executor: Executor) -> Self { + self.executor = executor; + self } } #[cfg(test)] mod tests { - use crate::container::builder::ContainerBuilder; - use crate::syscall::syscall::create_syscall; + use crate::{container::builder::ContainerBuilder, syscall::syscall::SyscallType}; use anyhow::{Context, Result}; use std::path::PathBuf; @@ -285,42 +268,41 @@ mod tests { fn test_failable_functions() -> Result<()> { let root_path_temp_dir = tempfile::tempdir().context("failed to create temp dir")?; let pid_file_temp_dir = tempfile::tempdir().context("failed to create temp dir")?; - let syscall = create_syscall(); + let syscall = SyscallType::default(); - ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall.as_ref()) + ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall) .with_root_path(root_path_temp_dir.path())? .with_pid_file(Some(pid_file_temp_dir.path().join("fake.pid")))? .with_console_socket(Some("/var/run/docker/sock.tty")) .as_init("/var/run/docker/bundle"); // accept None pid file. - ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall.as_ref()) - .with_pid_file::(None)?; + ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall).with_pid_file::(None)?; // accept absolute root path which does not exist let abs_root_path = PathBuf::from("/not/existing/path"); - let path_builder = ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall.as_ref()) + let path_builder = ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall) .with_root_path(&abs_root_path) .context("build container")?; assert_eq!(path_builder.root_path, abs_root_path); // accept relative root path which does not exist let cwd = std::env::current_dir().context("get current dir")?; - let path_builder = ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall.as_ref()) + let path_builder = ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall) .with_root_path("./not/existing/path") .context("build container")?; assert_eq!(path_builder.root_path, cwd.join("not/existing/path")); // accept absolute pid path which does not exist let abs_pid_path = PathBuf::from("/not/existing/path"); - let path_builder = ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall.as_ref()) + let path_builder = ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall) .with_pid_file(Some(&abs_pid_path)) .context("build container")?; assert_eq!(path_builder.pid_file, Some(abs_pid_path)); // accept relative pid path which does not exist let cwd = std::env::current_dir().context("get current dir")?; - let path_builder = ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall.as_ref()) + let path_builder = ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall) .with_pid_file(Some("./not/existing/path")) .context("build container")?; assert_eq!(path_builder.pid_file, Some(cwd.join("not/existing/path"))); @@ -330,22 +312,21 @@ mod tests { #[test] fn test_validate_id() -> Result<()> { - let syscall = create_syscall(); + let syscall = SyscallType::default(); // validate container_id - let result = ContainerBuilder::new("$#".to_owned(), syscall.as_ref()).validate_id(); + let result = ContainerBuilder::new("$#".to_owned(), syscall).validate_id(); assert!(result.is_err()); - let result = ContainerBuilder::new(".".to_owned(), syscall.as_ref()).validate_id(); + let result = ContainerBuilder::new(".".to_owned(), syscall).validate_id(); assert!(result.is_err()); - let result = ContainerBuilder::new("..".to_owned(), syscall.as_ref()).validate_id(); + let result = ContainerBuilder::new("..".to_owned(), syscall).validate_id(); assert!(result.is_err()); - let result = ContainerBuilder::new("...".to_owned(), syscall.as_ref()).validate_id(); + let result = ContainerBuilder::new("...".to_owned(), syscall).validate_id(); assert!(result.is_ok()); - let result = - ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall.as_ref()).validate_id(); + let result = ContainerBuilder::new("74f1a4cb3801".to_owned(), syscall).validate_id(); assert!(result.is_ok()); Ok(()) } diff --git a/crates/libcontainer/src/container/builder_impl.rs b/crates/libcontainer/src/container/builder_impl.rs index 59fcc5f99..787b1cdf0 100644 --- a/crates/libcontainer/src/container/builder_impl.rs +++ b/crates/libcontainer/src/container/builder_impl.rs @@ -2,33 +2,32 @@ use super::{Container, ContainerStatus}; use crate::{ error::{LibcontainerError, MissingSpecError}, hooks, - notify_socket::NotifyListener, process::{ self, args::{ContainerArgs, ContainerType}, intel_rdt::delete_resctrl_subdirectory, }, rootless::Rootless, - syscall::Syscall, + syscall::syscall::SyscallType, utils, - workload::ExecutorManager, + workload::Executor, }; use libcgroups::common::CgroupManager; use nix::unistd::Pid; use oci_spec::runtime::Spec; use std::{fs, io::Write, os::unix::prelude::RawFd, path::PathBuf}; -pub(super) struct ContainerBuilderImpl<'a> { +pub(super) struct ContainerBuilderImpl { /// Flag indicating if an init or a tenant container should be created pub container_type: ContainerType, /// Interface to operating system primitives - pub syscall: &'a dyn Syscall, + pub syscall: SyscallType, /// Flag indicating if systemd should be used for cgroup management pub use_systemd: bool, /// Id of the container pub container_id: String, /// OCI compliant runtime spec - pub spec: &'a Spec, + pub spec: Spec, /// Root filesystem of the container pub rootfs: PathBuf, /// File which will be used to communicate the pid of the @@ -37,7 +36,7 @@ pub(super) struct ContainerBuilderImpl<'a> { /// Socket to communicate the file descriptor of the ptty pub console_socket: Option, /// Options for rootless containers - pub rootless: Option>, + pub rootless: Option, /// Path to the Unix Domain Socket to communicate container start pub notify_path: PathBuf, /// Container state @@ -47,10 +46,10 @@ pub(super) struct ContainerBuilderImpl<'a> { /// If the container is to be run in detached mode pub detached: bool, /// Default executes the specified execution of a generic command - pub executor_manager: ExecutorManager, + pub executor: Executor, } -impl<'a> ContainerBuilderImpl<'a> { +impl ContainerBuilderImpl { pub(super) fn create(&mut self) -> Result { match self.run_container() { Ok(pid) => Ok(pid), @@ -73,11 +72,11 @@ impl<'a> ContainerBuilderImpl<'a> { &self.container_id, self.rootless.is_some(), ); - let cmanager = libcgroups::common::create_cgroup_manager( - cgroups_path, - self.use_systemd || self.rootless.is_some(), - &self.container_id, - )?; + let cgroup_config = libcgroups::common::CgroupConfig { + cgroup_path: cgroups_path, + systemd_cgroup: self.use_systemd || self.rootless.is_some(), + container_name: self.container_id.to_owned(), + }; let process = self .spec .process() @@ -90,12 +89,6 @@ impl<'a> ContainerBuilderImpl<'a> { } } - // Need to create the notify socket before we pivot root, since the unix - // domain socket used here is outside of the rootfs of container. During - // exec, need to create the socket before we enter into existing mount - // namespace. - let notify_socket: NotifyListener = NotifyListener::new(&self.notify_path)?; - // If Out-of-memory score adjustment is set in specification. set the score // value for the current process check // https://dev.to/rrampage/surviving-the-linux-oom-killer-2ki9 for some more @@ -130,22 +123,31 @@ impl<'a> ContainerBuilderImpl<'a> { prctl::set_dumpable(false).unwrap(); } + // Need to create the notify socket before we pivot root, since the unix + // domain socket used here is outside of the rootfs of container. During + // exec, need to create the socket before we enter into existing mount + // namespace. We also need to create to socket before entering into the + // user namespace in the case that the path is located in paths only + // root can access. + let notify_listener = + crate::notify_socket::NotifyListener::new(self.notify_path.as_path())?; + // This container_args will be passed to the container processes, // therefore we will have to move all the variable by value. Since self // is a shared reference, we have to clone these variables here. let container_args = ContainerArgs { container_type: self.container_type, syscall: self.syscall, - spec: self.spec, - rootfs: &self.rootfs, + spec: self.spec.to_owned(), + rootfs: self.rootfs.to_owned(), console_socket: self.console_socket, - notify_socket, + notify_listener, preserve_fds: self.preserve_fds, - container: &self.container, - rootless: &self.rootless, - cgroup_manager: cmanager, + container: self.container.to_owned(), + rootless: self.rootless.to_owned(), + cgroup_config, detached: self.detached, - executor_manager: &self.executor_manager, + executor: self.executor.clone(), }; let (init_pid, need_to_clean_up_intel_rdt_dir) = @@ -184,11 +186,12 @@ impl<'a> ContainerBuilderImpl<'a> { &self.container_id, self.rootless.is_some(), ); - let cmanager = libcgroups::common::create_cgroup_manager( - cgroups_path, - self.use_systemd || self.rootless.is_some(), - &self.container_id, - )?; + let cmanager = + libcgroups::common::create_cgroup_manager(&libcgroups::common::CgroupConfig { + cgroup_path: cgroups_path, + systemd_cgroup: self.use_systemd || self.rootless.is_some(), + container_name: self.container_id.to_string(), + })?; let mut errors = Vec::new(); diff --git a/crates/libcontainer/src/container/container_delete.rs b/crates/libcontainer/src/container/container_delete.rs index 99a3cce3a..322a47f7d 100644 --- a/crates/libcontainer/src/container/container_delete.rs +++ b/crates/libcontainer/src/container/container_delete.rs @@ -13,13 +13,12 @@ impl Container { /// /// ```no_run /// use libcontainer::container::builder::ContainerBuilder; - /// use libcontainer::syscall::syscall::create_syscall; - /// use libcontainer::workload::default::DefaultExecutor; + /// use libcontainer::syscall::syscall::SyscallType; /// /// # fn main() -> anyhow::Result<()> { /// let mut container = ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .as_init("/var/run/docker/bundle") /// .build()?; @@ -82,11 +81,12 @@ impl Container { // remove the cgroup created for the container // check https://man7.org/linux/man-pages/man7/cgroups.7.html // creating and removing cgroups section for more information on cgroups - let use_systemd = self.systemd(); let cmanager = libcgroups::common::create_cgroup_manager( - &config.cgroup_path, - use_systemd, - self.id(), + &libcgroups::common::CgroupConfig { + cgroup_path: config.cgroup_path.to_owned(), + systemd_cgroup: self.systemd(), + container_name: self.id().to_string(), + }, )?; cmanager.remove().map_err(|err| { tracing::error!(cgroup_path = ?config.cgroup_path, "failed to remove cgroup due to: {err:?}"); diff --git a/crates/libcontainer/src/container/container_events.rs b/crates/libcontainer/src/container/container_events.rs index 4d45bf889..6fcfec320 100644 --- a/crates/libcontainer/src/container/container_events.rs +++ b/crates/libcontainer/src/container/container_events.rs @@ -12,13 +12,12 @@ impl Container { /// /// ```no_run /// use libcontainer::container::builder::ContainerBuilder; - /// use libcontainer::syscall::syscall::create_syscall; - /// use libcontainer::workload::default::DefaultExecutor; + /// use libcontainer::syscall::syscall::SyscallType; /// /// # fn main() -> anyhow::Result<()> { /// let mut container = ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .as_init("/var/run/docker/bundle") /// .build()?; @@ -34,11 +33,12 @@ impl Container { return Err(LibcontainerError::IncorrectStatus); } - let cgroups_path = self.spec()?.cgroup_path; - let use_systemd = self.systemd(); - let cgroup_manager = - libcgroups::common::create_cgroup_manager(cgroups_path, use_systemd, self.id())?; + libcgroups::common::create_cgroup_manager(&libcgroups::common::CgroupConfig { + cgroup_path: self.spec()?.cgroup_path, + systemd_cgroup: self.systemd(), + container_name: self.id().to_string(), + })?; match stats { true => { let stats = cgroup_manager.stats()?; diff --git a/crates/libcontainer/src/container/container_kill.rs b/crates/libcontainer/src/container/container_kill.rs index 9853d7c78..8a122950b 100644 --- a/crates/libcontainer/src/container/container_kill.rs +++ b/crates/libcontainer/src/container/container_kill.rs @@ -1,6 +1,6 @@ use super::{Container, ContainerStatus}; use crate::{error::LibcontainerError, signal::Signal}; -use libcgroups::common::{create_cgroup_manager, get_cgroup_setup, CgroupManager}; +use libcgroups::common::{get_cgroup_setup, CgroupManager}; use nix::sys::signal::{self}; impl Container { @@ -10,14 +10,13 @@ impl Container { /// /// ```no_run /// use libcontainer::container::builder::ContainerBuilder; - /// use libcontainer::syscall::syscall::create_syscall; - /// use libcontainer::workload::default::DefaultExecutor; + /// use libcontainer::syscall::syscall::SyscallType; /// use nix::sys::signal::Signal; /// /// # fn main() -> anyhow::Result<()> { /// let mut container = ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .as_init("/var/run/docker/bundle") /// .build()?; @@ -79,10 +78,14 @@ impl Container { match get_cgroup_setup()? { libcgroups::common::CgroupSetup::Legacy | libcgroups::common::CgroupSetup::Hybrid => { - let cgroups_path = self.spec()?.cgroup_path; - let use_systemd = self.systemd(); - let cmanger = create_cgroup_manager(cgroups_path, use_systemd, self.id())?; - cmanger.freeze(libcgroups::common::FreezerState::Thawed)?; + let cmanager = libcgroups::common::create_cgroup_manager( + &libcgroups::common::CgroupConfig { + cgroup_path: self.spec()?.cgroup_path, + systemd_cgroup: self.systemd(), + container_name: self.id().to_string(), + }, + )?; + cmanager.freeze(libcgroups::common::FreezerState::Thawed)?; } libcgroups::common::CgroupSetup::Unified => {} } @@ -92,11 +95,14 @@ impl Container { fn kill_all_processes>(&self, signal: S) -> Result<(), LibcontainerError> { let signal = signal.into().into_raw(); - let cgroups_path = self.spec()?.cgroup_path; - let use_systemd = self.systemd(); - let cmanger = create_cgroup_manager(cgroups_path, use_systemd, self.id())?; + let cmanager = + libcgroups::common::create_cgroup_manager(&libcgroups::common::CgroupConfig { + cgroup_path: self.spec()?.cgroup_path, + systemd_cgroup: self.systemd(), + container_name: self.id().to_string(), + })?; - if let Err(e) = cmanger.freeze(libcgroups::common::FreezerState::Frozen) { + if let Err(e) = cmanager.freeze(libcgroups::common::FreezerState::Frozen) { tracing::warn!( err = ?e, id = ?self.id(), @@ -104,7 +110,7 @@ impl Container { ); } - let pids = cmanger.get_all_pids()?; + let pids = cmanager.get_all_pids()?; pids.iter() .try_for_each(|&pid| { tracing::debug!("kill signal {} to {}", signal, pid); @@ -118,7 +124,7 @@ impl Container { } }) .map_err(LibcontainerError::OtherSyscall)?; - if let Err(err) = cmanger.freeze(libcgroups::common::FreezerState::Thawed) { + if let Err(err) = cmanager.freeze(libcgroups::common::FreezerState::Thawed) { tracing::warn!( err = ?err, id = ?self.id(), diff --git a/crates/libcontainer/src/container/container_pause.rs b/crates/libcontainer/src/container/container_pause.rs index aa2074656..c35302b66 100644 --- a/crates/libcontainer/src/container/container_pause.rs +++ b/crates/libcontainer/src/container/container_pause.rs @@ -10,13 +10,12 @@ impl Container { /// /// ```no_run /// use libcontainer::container::builder::ContainerBuilder; - /// use libcontainer::syscall::syscall::create_syscall; - /// use libcontainer::workload::default::DefaultExecutor; + /// use libcontainer::syscall::syscall::SyscallType; /// /// # fn main() -> anyhow::Result<()> { /// let mut container = ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .as_init("/var/run/docker/bundle") /// .build()?; @@ -33,10 +32,12 @@ impl Container { return Err(LibcontainerError::IncorrectStatus); } - let cgroups_path = self.spec()?.cgroup_path; - let use_systemd = self.systemd(); let cmanager = - libcgroups::common::create_cgroup_manager(cgroups_path, use_systemd, self.id())?; + libcgroups::common::create_cgroup_manager(&libcgroups::common::CgroupConfig { + cgroup_path: self.spec()?.cgroup_path, + systemd_cgroup: self.systemd(), + container_name: self.id().to_string(), + })?; cmanager.freeze(FreezerState::Frozen)?; tracing::debug!("saving paused status"); diff --git a/crates/libcontainer/src/container/container_resume.rs b/crates/libcontainer/src/container/container_resume.rs index 0275f2bfc..d2165ec80 100644 --- a/crates/libcontainer/src/container/container_resume.rs +++ b/crates/libcontainer/src/container/container_resume.rs @@ -11,13 +11,12 @@ impl Container { /// /// ```no_run /// use libcontainer::container::builder::ContainerBuilder; - /// use libcontainer::syscall::syscall::create_syscall; - /// use libcontainer::workload::default::DefaultExecutor; + /// use libcontainer::syscall::syscall::SyscallType; /// /// # fn main() -> anyhow::Result<()> { /// let mut container = ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .as_init("/var/run/docker/bundle") /// .build()?; @@ -35,10 +34,12 @@ impl Container { return Err(LibcontainerError::IncorrectStatus); } - let cgroups_path = self.spec()?.cgroup_path; - let use_systemd = self.systemd(); let cmanager = - libcgroups::common::create_cgroup_manager(cgroups_path, use_systemd, self.id())?; + libcgroups::common::create_cgroup_manager(&libcgroups::common::CgroupConfig { + cgroup_path: self.spec()?.cgroup_path, + systemd_cgroup: self.systemd(), + container_name: self.id().to_string(), + })?; // resume the frozen container cmanager.freeze(FreezerState::Thawed)?; diff --git a/crates/libcontainer/src/container/container_start.rs b/crates/libcontainer/src/container/container_start.rs index 5ce9ac4a6..c6805511c 100644 --- a/crates/libcontainer/src/container/container_start.rs +++ b/crates/libcontainer/src/container/container_start.rs @@ -15,13 +15,12 @@ impl Container { /// /// ```no_run /// use libcontainer::container::builder::ContainerBuilder; - /// use libcontainer::syscall::syscall::create_syscall; - /// use libcontainer::workload::default::DefaultExecutor; + /// use libcontainer::syscall::syscall::SyscallType; /// /// # fn main() -> anyhow::Result<()> { /// let mut container = ContainerBuilder::new( /// "74f1a4cb3801".to_owned(), - /// create_syscall().as_ref(), + /// SyscallType::default(), /// ) /// .as_init("/var/run/docker/bundle") /// .build()?; diff --git a/crates/libcontainer/src/container/init_builder.rs b/crates/libcontainer/src/container/init_builder.rs index b18a0b0ad..d30213053 100644 --- a/crates/libcontainer/src/container/init_builder.rs +++ b/crates/libcontainer/src/container/init_builder.rs @@ -20,17 +20,17 @@ use super::{ }; // Builder that can be used to configure the properties of a new container -pub struct InitContainerBuilder<'a> { - base: ContainerBuilder<'a>, +pub struct InitContainerBuilder { + base: ContainerBuilder, bundle: PathBuf, use_systemd: bool, detached: bool, } -impl<'a> InitContainerBuilder<'a> { +impl InitContainerBuilder { /// Generates the base configuration for a new container from which /// configuration methods can be chained - pub(super) fn new(builder: ContainerBuilder<'a>, bundle: PathBuf) -> Self { + pub(super) fn new(builder: ContainerBuilder, bundle: PathBuf) -> Self { Self { base: builder, bundle, @@ -99,14 +99,14 @@ impl<'a> InitContainerBuilder<'a> { pid_file: self.base.pid_file, console_socket: csocketfd, use_systemd: self.use_systemd, - spec: &spec, + spec, rootfs, rootless, notify_path, container: Some(container.clone()), preserve_fds: self.base.preserve_fds, detached: self.detached, - executor_manager: self.base.executor_manager, + executor: self.base.executor, }; builder_impl.create()?; diff --git a/crates/libcontainer/src/container/tenant_builder.rs b/crates/libcontainer/src/container/tenant_builder.rs index ebdff13ea..ae32cc583 100644 --- a/crates/libcontainer/src/container/tenant_builder.rs +++ b/crates/libcontainer/src/container/tenant_builder.rs @@ -32,8 +32,8 @@ const TENANT_TTY: &str = "tenant-tty-"; /// Builder that can be used to configure the properties of a process /// that will join an existing container sandbox -pub struct TenantContainerBuilder<'a> { - base: ContainerBuilder<'a>, +pub struct TenantContainerBuilder { + base: ContainerBuilder, env: HashMap, cwd: Option, args: Vec, @@ -43,11 +43,11 @@ pub struct TenantContainerBuilder<'a> { detached: bool, } -impl<'a> TenantContainerBuilder<'a> { +impl TenantContainerBuilder { /// Generates the base configuration for a process that will join /// an existing container sandbox from which configuration methods /// can be chained - pub(super) fn new(builder: ContainerBuilder<'a>) -> Self { + pub(super) fn new(builder: ContainerBuilder) -> Self { Self { base: builder, env: HashMap::new(), @@ -132,14 +132,14 @@ impl<'a> TenantContainerBuilder<'a> { pid_file: self.base.pid_file, console_socket: csocketfd, use_systemd, - spec: &spec, + spec, rootfs, rootless, notify_path: notify_path.clone(), container: None, preserve_fds: self.base.preserve_fds, detached: self.detached, - executor_manager: self.base.executor_manager, + executor: self.base.executor, }; let pid = builder_impl.create()?; diff --git a/crates/libcontainer/src/notify_socket.rs b/crates/libcontainer/src/notify_socket.rs index 9daf16b89..004027532 100644 --- a/crates/libcontainer/src/notify_socket.rs +++ b/crates/libcontainer/src/notify_socket.rs @@ -1,6 +1,7 @@ use nix::unistd::{self, close}; use std::env; use std::io::prelude::*; +use std::os::fd::FromRawFd; use std::os::unix::io::AsRawFd; use std::os::unix::net::{UnixListener, UnixStream}; use std::path::{Path, PathBuf}; @@ -9,7 +10,7 @@ pub const NOTIFY_FILE: &str = "notify.sock"; #[derive(Debug, thiserror::Error)] pub enum NotifyListenerError { - #[error("failed to chdir while creating notify socket")] + #[error("failed to chdir {path} while creating notify socket: {source}")] Chdir { source: nix::Error, path: PathBuf }, #[error("invalid path: {0}")] InvalidPath(PathBuf), @@ -43,6 +44,7 @@ pub struct NotifyListener { impl NotifyListener { pub fn new(socket_path: &Path) -> Result { + tracing::debug!(?socket_path, "create notify listener"); // Unix domain socket has a maximum length of 108, different from // normal path length of 255. Due to how docker create the path name // to the container working directory, there is a high chance that @@ -56,6 +58,7 @@ impl NotifyListener { .file_name() .ok_or_else(|| NotifyListenerError::InvalidPath(socket_path.to_owned()))?; let cwd = env::current_dir().map_err(NotifyListenerError::GetCwd)?; + tracing::debug!(?cwd, "the cwd to create the notify socket"); unistd::chdir(workdir).map_err(|e| NotifyListenerError::Chdir { source: e, path: workdir.to_owned(), @@ -93,6 +96,15 @@ impl NotifyListener { } } +impl Clone for NotifyListener { + fn clone(&self) -> Self { + let fd = self.socket.as_raw_fd(); + // This is safe because we just duplicate a valid fd. + let socket = unsafe { UnixListener::from_raw_fd(fd) }; + Self { socket } + } +} + pub struct NotifySocket { path: PathBuf, } diff --git a/crates/libcontainer/src/process/args.rs b/crates/libcontainer/src/process/args.rs index 44258975e..51530d2cd 100644 --- a/crates/libcontainer/src/process/args.rs +++ b/crates/libcontainer/src/process/args.rs @@ -1,11 +1,13 @@ -use libcgroups::common::AnyCgroupManager; +use libcgroups::common::CgroupConfig; use oci_spec::runtime::Spec; use std::os::unix::prelude::RawFd; use std::path::PathBuf; +use crate::container::Container; +use crate::notify_socket::NotifyListener; use crate::rootless::Rootless; -use crate::workload::ExecutorManager; -use crate::{container::Container, notify_socket::NotifyListener, syscall::Syscall}; +use crate::syscall::syscall::SyscallType; +use crate::workload::Executor; #[derive(Debug, Copy, Clone)] pub enum ContainerType { @@ -13,29 +15,30 @@ pub enum ContainerType { TenantContainer { exec_notify_fd: RawFd }, } -pub struct ContainerArgs<'a> { +#[derive(Clone)] +pub struct ContainerArgs { /// Indicates if an init or a tenant container should be created pub container_type: ContainerType, /// Interface to operating system primitives - pub syscall: &'a dyn Syscall, + pub syscall: SyscallType, /// OCI compliant runtime spec - pub spec: &'a Spec, + pub spec: Spec, /// Root filesystem of the container - pub rootfs: &'a PathBuf, + pub rootfs: PathBuf, /// Socket to communicate the file descriptor of the ptty pub console_socket: Option, /// The Unix Domain Socket to communicate container start - pub notify_socket: NotifyListener, + pub notify_listener: NotifyListener, /// File descriptors preserved/passed to the container init process. pub preserve_fds: i32, /// Container state - pub container: &'a Option, + pub container: Option, /// Options for rootless containers - pub rootless: &'a Option>, - /// Cgroup Manager - pub cgroup_manager: AnyCgroupManager, + pub rootless: Option, + /// Cgroup Manager Config + pub cgroup_config: CgroupConfig, /// If the container is to be run in detached mode pub detached: bool, /// Manage the functions that actually run on the container - pub executor_manager: &'a ExecutorManager, + pub executor: Executor, } diff --git a/crates/libcontainer/src/process/channel.rs b/crates/libcontainer/src/process/channel.rs index f05bf205d..5a03c4840 100644 --- a/crates/libcontainer/src/process/channel.rs +++ b/crates/libcontainer/src/process/channel.rs @@ -40,6 +40,7 @@ pub fn main_channel() -> Result<(MainSender, MainReceiver), ChannelError> { Ok((MainSender { sender }, MainReceiver { receiver })) } +#[derive(Clone)] pub struct MainSender { sender: Sender, } @@ -87,6 +88,7 @@ impl MainSender { } } +#[derive(Clone)] pub struct MainReceiver { receiver: Receiver, } @@ -193,6 +195,7 @@ pub fn intermediate_channel() -> Result<(IntermediateSender, IntermediateReceive )) } +#[derive(Clone)] pub struct IntermediateSender { sender: Sender, } @@ -212,6 +215,7 @@ impl IntermediateSender { } } +#[derive(Clone)] pub struct IntermediateReceiver { receiver: Receiver, } @@ -248,6 +252,7 @@ pub fn init_channel() -> Result<(InitSender, InitReceiver), ChannelError> { Ok((InitSender { sender }, InitReceiver { receiver })) } +#[derive(Clone)] pub struct InitSender { sender: Sender, } @@ -266,6 +271,7 @@ impl InitSender { } } +#[derive(Clone)] pub struct InitReceiver { receiver: Receiver, } diff --git a/crates/libcontainer/src/process/container_init_process.rs b/crates/libcontainer/src/process/container_init_process.rs index 3bdbb6a32..51086097c 100644 --- a/crates/libcontainer/src/process/container_init_process.rs +++ b/crates/libcontainer/src/process/container_init_process.rs @@ -69,7 +69,7 @@ pub enum InitProcessError { #[error(transparent)] NotifyListener(#[from] notify_socket::NotifyListenerError), #[error(transparent)] - Workload(#[from] workload::ExecutorManagerError), + Workload(#[from] workload::ExecutorError), } type Result = std::result::Result; @@ -332,15 +332,16 @@ pub fn container_init_process( main_sender: &mut channel::MainSender, init_receiver: &mut channel::InitReceiver, ) -> Result<()> { - let syscall = args.syscall; - let spec = args.spec; + let syscall = args.syscall.create_syscall(); + let spec = &args.spec; let linux = spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; let proc = spec.process().as_ref().ok_or(MissingSpecError::Process)?; let mut envs: Vec = proc.env().as_ref().unwrap_or(&vec![]).clone(); - let rootfs_path = args.rootfs; + let rootfs_path = &args.rootfs; let hooks = spec.hooks().as_ref(); let container = args.container.as_ref(); let namespaces = Namespaces::try_from(linux.namespaces().as_ref())?; + let notify_listener = &args.notify_listener; setsid().map_err(|err| { tracing::error!(?err, "failed to setsid to create a session"); @@ -354,7 +355,7 @@ pub fn container_init_process( })?; } - apply_rest_namespaces(&namespaces, spec, syscall)?; + apply_rest_namespaces(&namespaces, spec, syscall.as_ref())?; if let Some(true) = proc.no_new_privileges() { let _ = prctl::set_no_new_privileges(true); @@ -452,7 +453,7 @@ pub fn container_init_process( if let Some(paths) = linux.readonly_paths() { // mount readonly path for path in paths { - readonly_path(Path::new(path), syscall).map_err(|err| { + readonly_path(Path::new(path), syscall.as_ref()).map_err(|err| { tracing::error!(?err, ?path, "failed to set readonly path"); err })?; @@ -462,7 +463,7 @@ pub fn container_init_process( if let Some(paths) = linux.masked_paths() { // mount masked path for path in paths { - masked_path(Path::new(path), linux.mount_label(), syscall).map_err(|err| { + masked_path(Path::new(path), linux.mount_label(), syscall.as_ref()).map_err(|err| { tracing::error!(?err, ?path, "failed to set masked path"); err })?; @@ -486,7 +487,7 @@ pub fn container_init_process( } }; - set_supplementary_gids(proc.user(), args.rootless, syscall).map_err(|err| { + set_supplementary_gids(proc.user(), &args.rootless, syscall.as_ref()).map_err(|err| { tracing::error!(?err, "failed to set supplementary gids"); err })?; @@ -576,12 +577,12 @@ pub fn container_init_process( tracing::warn!("seccomp not available, unable to enforce no_new_privileges!") } - capabilities::reset_effective(syscall).map_err(|err| { + capabilities::reset_effective(syscall.as_ref()).map_err(|err| { tracing::error!(?err, "failed to reset effective capabilities"); InitProcessError::SyscallOther(err) })?; if let Some(caps) = proc.capabilities() { - capabilities::drop_privileges(caps, syscall).map_err(|err| { + capabilities::drop_privileges(caps, syscall.as_ref()).map_err(|err| { tracing::error!(?err, "failed to drop capabilities"); InitProcessError::SyscallOther(err) })?; @@ -652,13 +653,11 @@ pub fn container_init_process( })?; // listing on the notify socket for container start command - args.notify_socket - .wait_for_container_start() - .map_err(|err| { - tracing::error!(?err, "failed to wait for container start"); - err - })?; - args.notify_socket.close().map_err(|err| { + notify_listener.wait_for_container_start().map_err(|err| { + tracing::error!(?err, "failed to wait for container start"); + err + })?; + notify_listener.close().map_err(|err| { tracing::error!(?err, "failed to close notify socket"); err })?; @@ -675,7 +674,7 @@ pub fn container_init_process( } if proc.args().is_some() { - args.executor_manager.exec(spec).map_err(|err| { + (args.executor)(spec).map_err(|err| { tracing::error!(?err, "failed to execute payload"); err })?; diff --git a/crates/libcontainer/src/process/container_intermediate_process.rs b/crates/libcontainer/src/process/container_intermediate_process.rs index 5a9706f02..e515f2520 100644 --- a/crates/libcontainer/src/process/container_intermediate_process.rs +++ b/crates/libcontainer/src/process/container_intermediate_process.rs @@ -8,6 +8,7 @@ use procfs::process::Process; use super::args::{ContainerArgs, ContainerType}; use super::container_init_process::container_init_process; +use super::fork::CloneCb; #[derive(Debug, thiserror::Error)] pub enum IntermediateProcessError { @@ -36,13 +37,14 @@ pub fn container_intermediate_process( intermediate_chan: &mut (channel::IntermediateSender, channel::IntermediateReceiver), init_chan: &mut (channel::InitSender, channel::InitReceiver), main_sender: &mut channel::MainSender, -) -> Result { +) -> Result<()> { let (inter_sender, inter_receiver) = intermediate_chan; let (init_sender, init_receiver) = init_chan; - let command = &args.syscall; + let command = args.syscall.create_syscall(); let spec = &args.spec; let linux = spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; let namespaces = Namespaces::try_from(linux.namespaces().as_ref())?; + let cgroup_manager = libcgroups::common::create_cgroup_manager(&args.cgroup_config).unwrap(); // this needs to be done before we create the init process, so that the init // process will already be captured by the cgroup. It also needs to be done @@ -55,7 +57,7 @@ pub fn container_intermediate_process( // the cgroup of the process will form the root of the cgroup hierarchy in // the cgroup namespace. apply_cgroups( - &args.cgroup_manager, + &cgroup_manager, linux.resources().as_ref(), matches!(args.container_type, ContainerType::InitContainer), )?; @@ -107,6 +109,51 @@ pub fn container_intermediate_process( namespaces.unshare_or_setns(pid_namespace)?; } + let cb: CloneCb = { + let args = args.clone(); + let init_sender = init_sender.clone(); + let inter_sender = inter_sender.clone(); + let mut main_sender = main_sender.clone(); + let mut init_receiver = init_receiver.clone(); + Box::new(move || { + if let Err(ret) = prctl::set_name("youki:[2:INIT]") { + tracing::error!(?ret, "failed to set name for child process"); + return ret; + } + + // We are inside the forked process here. The first thing we have to do + // is to close any unused senders, since fork will make a dup for all + // the socket. + if let Err(err) = init_sender.close() { + tracing::error!(?err, "failed to close receiver in init process"); + return -1; + } + if let Err(err) = inter_sender.close() { + tracing::error!(?err, "failed to close sender in the intermediate process"); + return -1; + } + match container_init_process(&args, &mut main_sender, &mut init_receiver) { + Ok(_) => 0, + Err(e) => { + if let ContainerType::TenantContainer { exec_notify_fd } = args.container_type { + let buf = format!("{e}"); + if let Err(err) = write(exec_notify_fd, buf.as_bytes()) { + tracing::error!(?err, "failed to write to exec notify fd"); + return -1; + } + if let Err(err) = close(exec_notify_fd) { + tracing::error!(?err, "failed to close exec notify fd"); + return -1; + } + } + + tracing::error!("failed to initialize container process: {e}"); + -1 + } + } + }) + }; + // We have to record the pid of the init process. The init process will be // inside the pid namespace, so we can't rely on the init process to send us // the correct pid. We also want to clone the init process as a sibling @@ -115,41 +162,7 @@ pub fn container_intermediate_process( // configuration. The youki main process can decide what to do with the init // process and the intermediate process can just exit safely after the job // is done. - let pid = fork::container_clone_sibling("youki:[2:INIT]", || { - // We are inside the forked process here. The first thing we have to do - // is to close any unused senders, since fork will make a dup for all - // the socket. - init_sender.close().map_err(|err| { - tracing::error!("failed to close receiver in init process: {}", err); - IntermediateProcessError::Channel(err) - })?; - inter_sender.close().map_err(|err| { - tracing::error!( - "failed to close sender in the intermediate process: {}", - err - ); - IntermediateProcessError::Channel(err) - })?; - match container_init_process(args, main_sender, init_receiver) { - Ok(_) => Ok(0), - Err(e) => { - if let ContainerType::TenantContainer { exec_notify_fd } = args.container_type { - let buf = format!("{e}"); - write(exec_notify_fd, buf.as_bytes()).map_err(|err| { - tracing::error!("failed to write to exec notify fd: {}", err); - IntermediateProcessError::ExecNotify(err) - })?; - close(exec_notify_fd).map_err(|err| { - tracing::error!("failed to close exec notify fd: {}", err); - IntermediateProcessError::ExecNotify(err) - })?; - } - tracing::error!("failed to initialize container process: {e}"); - Err(e.into()) - } - } - }) - .map_err(|err| { + let pid = fork::container_clone_sibling(cb).map_err(|err| { tracing::error!("failed to fork init process: {}", err); IntermediateProcessError::InitProcess(err) })?; @@ -183,7 +196,8 @@ pub fn container_intermediate_process( tracing::error!("failed to close unused init sender: {}", err); err })?; - Ok(pid) + + Ok(()) } fn apply_cgroups< diff --git a/crates/libcontainer/src/process/container_main_process.rs b/crates/libcontainer/src/process/container_main_process.rs index 29d8f5584..38f7c0d60 100644 --- a/crates/libcontainer/src/process/container_main_process.rs +++ b/crates/libcontainer/src/process/container_main_process.rs @@ -1,6 +1,8 @@ use crate::{ process::{ - args::ContainerArgs, channel, container_intermediate_process, fork, + args::ContainerArgs, + channel, container_intermediate_process, + fork::{self, CloneCb}, intel_rdt::setup_intel_rdt, }, rootless::Rootless, @@ -37,33 +39,52 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, bo // cloned process, we have to be deligent about closing any unused channel. // At minimum, we have to close down any unused senders. The corresponding // receivers will be cleaned up once the senders are closed down. - let (main_sender, main_receiver) = &mut channel::main_channel()?; - let inter_chan = &mut channel::intermediate_channel()?; - let init_chan = &mut channel::init_channel()?; + let (main_sender, mut main_receiver) = channel::main_channel()?; + let inter_chan = channel::intermediate_channel()?; + let init_chan = channel::init_channel()?; - let intermediate_pid = fork::container_fork("youki:[1:INTER]", || { - container_intermediate_process::container_intermediate_process( - container_args, - inter_chan, - init_chan, - main_sender, - )?; + let cb: CloneCb = { + let container_args = container_args.clone(); + let mut main_sender = main_sender.clone(); + let mut inter_chan = inter_chan.clone(); + let mut init_chan = init_chan.clone(); + Box::new(move || { + if let Err(ret) = prctl::set_name("youki:[1:INTER]") { + tracing::error!(?ret, "failed to set name for child process"); + return ret; + } + + match container_intermediate_process::container_intermediate_process( + &container_args, + &mut inter_chan, + &mut init_chan, + &mut main_sender, + ) { + Ok(_) => 0, + Err(err) => { + tracing::error!(?err, "failed to run intermediate process"); + -1 + } + } + }) + }; - Ok(0) - }) - .map_err(|err| { + let intermediate_pid = fork::container_clone(cb).map_err(|err| { tracing::error!("failed to fork intermediate process: {}", err); ProcessError::IntermediateProcessFailed(err) })?; // Close down unused fds. The corresponding fds are duplicated to the - // child process during fork. + // child process during clone. main_sender.close().map_err(|err| { tracing::error!("failed to close unused sender: {}", err); err })?; - let (inter_sender, inter_receiver) = inter_chan; + let (mut inter_sender, inter_receiver) = inter_chan; + #[cfg(feature = "libseccomp")] + let (mut init_sender, init_receiver) = init_chan; + #[cfg(not(feature = "libseccomp"))] let (init_sender, init_receiver) = init_chan; // If creating a rootless container, the intermediate process will ask @@ -106,8 +127,8 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<(Pid, bo crate::process::seccomp_listener::sync_seccomp( seccomp, &state, - init_sender, - main_receiver, + &mut init_sender, + &mut main_receiver, )?; } @@ -222,7 +243,7 @@ mod tests { let tmp = tempfile::tempdir()?; let id_mapper = RootlessIDMapper::new_test(tmp.path().to_path_buf()); let rootless = Rootless { - uid_mappings: Some(&uid_mappings), + uid_mappings: Some(uid_mappings), privileged: true, rootless_id_mapper: id_mapper.clone(), ..Default::default() @@ -277,7 +298,7 @@ mod tests { let tmp = tempfile::tempdir()?; let id_mapper = RootlessIDMapper::new_test(tmp.path().to_path_buf()); let rootless = Rootless { - gid_mappings: Some(&gid_mappings), + gid_mappings: Some(gid_mappings), rootless_id_mapper: id_mapper.clone(), ..Default::default() }; diff --git a/crates/libcontainer/src/process/fork.rs b/crates/libcontainer/src/process/fork.rs index a3557a531..114a22440 100644 --- a/crates/libcontainer/src/process/fork.rs +++ b/crates/libcontainer/src/process/fork.rs @@ -1,98 +1,242 @@ +use std::{ffi::c_int, num::NonZeroUsize}; + use libc::SIGCHLD; -use nix::unistd::Pid; -use prctl; +use nix::{ + sys::{mman, resource}, + unistd::Pid, +}; #[derive(Debug, thiserror::Error)] pub enum CloneError { - #[error("failed to clone process using clone3")] + #[error("failed to clone process")] Clone(#[source] nix::Error), + #[error("failed to get system memory page size")] + PageSize(#[source] nix::Error), + #[error("failed to get resource limit")] + ResourceLimit(#[source] nix::Error), + #[error("the stack size is zero")] + ZeroStackSize, + #[error("failed to allocate stack")] + StackAllocation(#[source] nix::Error), + #[error("failed to create stack guard page")] + GuardPage(#[source] nix::Error), + #[error("unknown error code {0}")] + UnknownErrno(i32), } -type Result = std::result::Result; - -#[derive(Debug, thiserror::Error)] -pub enum CallbackError { - #[error(transparent)] - IntermediateProcess( - #[from] crate::process::container_intermediate_process::IntermediateProcessError, - ), - #[error(transparent)] - InitProcess(#[from] crate::process::container_init_process::InitProcessError), - // Need a fake error for testing - #[error("unknown")] - #[cfg(test)] - Test, -} - -type CallbackResult = std::result::Result; +/// The callback function used in clone system call. The return value is i32 +/// which is consistent with C functions return code. The trait has to be +/// `FnMut` because we need to be able to call the closure multiple times, once +/// in clone3 and once in clone if fallback is required. The closure is boxed +/// because we need to store the closure on heap, not stack in the case of +/// `clone`. Unlike `fork` or `clone3`, the `clone` glibc wrapper requires us to +/// pass in a child stack, which is empty. By storing the closure in heap, we +/// can then in the new process to re-box the heap memory back to a closure +/// correctly. +pub type CloneCb = Box i32>; -// Fork/Clone a sibling process that shares the same parent as the calling +// Clone a sibling process that shares the same parent as the calling // process. This is used to launch the container init process so the parent // process of the calling process can receive ownership of the process. If we // clone a child process as the init process, the calling process (likely the // youki main process) will exit and the init process will be re-parented to the // process 1 (system init process), which is not the right behavior of what we // look for. -pub fn container_clone_sibling CallbackResult>( - child_name: &str, - cb: F, -) -> Result { - let mut clone = clone3::Clone3::default(); +pub fn container_clone_sibling(cb: CloneCb) -> Result { // Note: normally, an exit signal is required, but when using // `CLONE_PARENT`, the `clone3` will return EINVAL if an exit signal is set. // The older `clone` will not return EINVAL in this case. Instead it ignores - // the exit signal bits in the glibc wrapper. - clone.flag_parent(); + // the exit signal bits in the glibc wrapper. Therefore, we explicitly set + // the exit_signal to None here, so this works for both version of clone. + clone_internal(cb, libc::CLONE_PARENT as u64, None) +} - container_clone(child_name, cb, clone) +// Clone a child process and execute the callback. +pub fn container_clone(cb: CloneCb) -> Result { + clone_internal(cb, 0, Some(SIGCHLD as u64)) +} + +// An internal wrapper to manage the clone3 vs clone fallback logic. +fn clone_internal( + mut cb: CloneCb, + flags: u64, + exit_signal: Option, +) -> Result { + match clone3(&mut cb, flags, exit_signal) { + Ok(pid) => Ok(pid), + // For now, we decide to only fallback on ENOSYS + Err(CloneError::Clone(nix::Error::ENOSYS)) => { + tracing::debug!("clone3 is not supported, fallback to clone"); + let pid = clone(cb, flags, exit_signal)?; + + Ok(pid) + } + Err(err) => Err(err), + } } -// A simple clone wrapper to clone3 so we can share this logic in different -// fork/clone situations. We decided to minimally support kernel version >= 5.4, -// and `clone3` requires only kernel version >= 5.3. Therefore, we don't need to -// fall back to `clone` or `fork`. -fn container_clone CallbackResult>( - child_name: &str, - cb: F, - mut clone_cmd: clone3::Clone3, -) -> Result { - // Return the child's pid in case of parent/calling process, and for the - // cloned process, run the callback function, and exit with the same exit - // code returned by the callback. If there was any error when trying to run - // callback, exit with -1 - match unsafe { - clone_cmd - .call() - .map_err(|err| CloneError::Clone(nix::errno::from_i32(err.0)))? - } { +// Unlike the clone call, clone3 is currently using the kernel syscall, mimicking +// the interface of fork. There is not need to explicitly manage the memory, so +// we can safely passing the callback closure as reference. +fn clone3(cb: &mut CloneCb, flags: u64, exit_signal: Option) -> Result { + #[repr(C)] + struct clone3_args { + flags: u64, + pidfd: u64, + child_tid: u64, + parent_tid: u64, + exit_signal: u64, + stack: u64, + stack_size: u64, + tls: u64, + set_tid: u64, + set_tid_size: u64, + cgroup: u64, + } + let mut args = clone3_args { + flags, + pidfd: 0, + child_tid: 0, + parent_tid: 0, + exit_signal: exit_signal.unwrap_or(0), + stack: 0, + stack_size: 0, + tls: 0, + set_tid: 0, + set_tid_size: 0, + cgroup: 0, + }; + let args_ptr = &mut args as *mut clone3_args; + let args_size = std::mem::size_of::(); + // For now, we can only use clone3 as a kernel syscall. Libc wrapper is not + // available yet. This can have undefined behavior because libc authors do + // not like people calling kernel syscall to directly create processes. Libc + // does perform additional bookkeeping when calling clone or fork. So far, + // we have not observed any issues with calling clone3 directly, but we + // should keep an eye on it. + match unsafe { libc::syscall(libc::SYS_clone3, args_ptr, args_size) } { + -1 => Err(CloneError::Clone(nix::Error::last())), 0 => { - prctl::set_name(child_name).expect("failed to set name"); - // Inside the cloned process - let ret = match cb() { - Err(error) => { - tracing::debug!("failed to run child process in clone: {:?}", error); - -1 - } - Ok(exit_code) => exit_code, - }; - std::process::exit(ret); + // Inside the cloned process, we execute the callback and exit with + // the return code. + std::process::exit(cb()); } - pid => Ok(Pid::from_raw(pid)), + ret if ret >= 0 => Ok(Pid::from_raw(ret as i32)), + ret => Err(CloneError::UnknownErrno(ret as i32)), } } -// Execute the cb in another process. Make the fork works more like thread_spawn -// or clone, so it is easier to reason. Compared to clone call, fork is easier -// to use since fork will magically take care of all the variable copying. If -// using clone, we would have to manually make sure all the variables are -// correctly send to the new process, especially Rust borrow checker will be a -// lot of hassel to deal with every details. -pub fn container_fork CallbackResult>(child_name: &str, cb: F) -> Result { - // Using `clone3` to mimic the effect of `fork`. - let mut clone = clone3::Clone3::default(); - clone.exit_signal(SIGCHLD as u64); - - container_clone(child_name, cb, clone) +fn clone(cb: CloneCb, flags: u64, exit_signal: Option) -> Result { + const DEFAULT_STACK_SIZE: usize = 8 * 1024 * 1024; // 8M + const DEFAULT_PAGE_SIZE: usize = 4 * 1024; // 4K + + // Use sysconf to find the page size. If there is an error, we assume + // the default 4K page size. + let page_size = nix::unistd::sysconf(nix::unistd::SysconfVar::PAGE_SIZE) + .map_err(CloneError::PageSize)? + .map(|size| size as usize) + .unwrap_or(DEFAULT_PAGE_SIZE); + + // Find out the default stack max size through getrlimit. + let (rlim_cur, _) = + resource::getrlimit(resource::Resource::RLIMIT_STACK).map_err(CloneError::ResourceLimit)?; + // mmap will return ENOMEM if stack size is unlimited when we create the + // child stack, so we need to set a reasonable default stack size. + let default_stack_size = if rlim_cur != u64::MAX { + rlim_cur as usize + } else { + tracing::debug!( + "stack size returned by getrlimit() is unlimited, use DEFAULT_STACK_SIZE(8MB)" + ); + DEFAULT_STACK_SIZE + }; + + // Using the clone syscall requires us to create the stack space for the + // child process instead of taken cared for us like fork call. We use mmap + // here to create the stack. Instead of guessing how much space the child + // process needs, we allocate through mmap to the system default limit, + // which is 8MB on most of the linux system today. This is OK since mmap + // will only reserve the address space upfront, instead of allocating + // physical memory upfront. The stack will grow as needed, up to the size + // reserved, so no wasted memory here. Lastly, the child stack only needs + // to support the container init process set up code in Youki. When Youki + // calls exec into the container payload, exec will reset the stack. Note, + // do not use MAP_GROWSDOWN since it is not well supported. + // Ref: https://man7.org/linux/man-pages/man2/mmap.2.html + let child_stack = unsafe { + mman::mmap( + None, + NonZeroUsize::new(default_stack_size).ok_or(CloneError::ZeroStackSize)?, + mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE, + mman::MapFlags::MAP_PRIVATE | mman::MapFlags::MAP_ANONYMOUS | mman::MapFlags::MAP_STACK, + -1, + 0, + ) + .map_err(CloneError::StackAllocation)? + }; + unsafe { + // Consistent with how pthread_create sets up the stack, we create a + // guard page of 1 page, to protect the child stack collision. Note, for + // clone call, the child stack will grow downward, so the bottom of the + // child stack is in the beginning. + mman::mprotect(child_stack, page_size, mman::ProtFlags::PROT_NONE) + .map_err(CloneError::GuardPage)?; + }; + + // Since the child stack for clone grows downward, we need to pass in + // the top of the stack address. + let child_stack_top = unsafe { child_stack.add(default_stack_size) }; + + // Combine the clone flags with exit signals. + let combined_flags = (flags | exit_signal.unwrap_or(0)) as c_int; + + // We are passing the boxed closure "cb" into the clone function as the a + // function pointer in C. The box closure in Rust is both a function pointer + // and a struct. However, when casting the box closure into libc::c_void, + // the function pointer will be lost. Therefore, to work around the issue, + // we double box the closure. This is consistent with how std::unix::thread + // handles the closure. + // Ref: https://github.com/rust-lang/rust/blob/master/library/std/src/sys/unix/thread.rs + let data = Box::into_raw(Box::new(cb)); + // The main is a wrapper function passed into clone call below. The "data" + // arg is actually a raw pointer to the Box closure. so here, we re-box the + // pointer back into a box closure so the main takes ownership of the + // memory. Then we can call the closure. + extern "C" fn main(data: *mut libc::c_void) -> libc::c_int { + unsafe { Box::from_raw(data as *mut CloneCb)() } + } + + // The nix::sched::clone wrapper doesn't provide the right interface. Using + // the clone syscall is one of the rare cases where we don't want rust to + // manage the child stack memory. Instead, we want to use c_void directly + // here. Therefore, here we are using libc::clone syscall directly for + // better control. The child stack will be cleaned when exec is called or + // the child process terminates. The nix wrapper also does not treat the + // closure memory correctly. The wrapper implementation fails to pass the + // right ownership to the new child process. + // Ref: https://github.com/nix-rust/nix/issues/919 + // Ref: https://github.com/nix-rust/nix/pull/920 + let ret = unsafe { + libc::clone( + main, + child_stack_top, + combined_flags, + data as *mut libc::c_void, + ) + }; + + // After the clone returns, the heap memory associated with the Box closure + // is duplicated in the cloned process. Therefore, we can safely re-box the + // closure from the raw pointer and let rust to continue managing the + // memory. We call drop here explicitly to avoid the warning that the + // closure is not used. This is correct since the closure is called in the + // cloned process, not the parent process. + unsafe { drop(Box::from_raw(data)) }; + match ret { + -1 => Err(CloneError::Clone(nix::Error::last())), + pid if ret > 0 => Ok(Pid::from_raw(pid)), + _ => unreachable!("clone returned a negative pid {ret}"), + } } #[cfg(test)] @@ -106,7 +250,7 @@ mod test { #[test] fn test_container_fork() -> Result<()> { - let pid = container_fork("test:child", || Ok(0))?; + let pid = container_clone(Box::new(|| 0))?; match waitpid(pid, None).expect("wait pid failed.") { WaitStatus::Exited(p, status) => { assert_eq!(pid, p); @@ -119,7 +263,7 @@ mod test { #[test] fn test_container_err_fork() -> Result<()> { - let pid = container_fork("test:child", || Err(CallbackError::Test))?; + let pid = container_clone(Box::new(|| -1))?; match waitpid(pid, None).expect("wait pid failed.") { WaitStatus::Exited(p, status) => { assert_eq!(pid, p); @@ -171,7 +315,7 @@ mod test { unistd::ForkResult::Child => { // Inside the forked process. We call `container_clone` and pass // the pid to the parent process. - let pid = container_clone_sibling("test:child", || Ok(0))?; + let pid = container_clone_sibling(Box::new(|| 0))?; sender.send(pid.as_raw())?; sender.close()?; std::process::exit(0); @@ -180,4 +324,68 @@ mod test { Ok(()) } + + // This test depends on libseccomp to work. + #[cfg(feature = "libseccomp")] + #[test] + fn test_clone_fallback() -> Result<()> { + use crate::test_utils::TestCallbackError; + use oci_spec::runtime::{ + Arch, LinuxSeccompAction, LinuxSeccompBuilder, LinuxSyscallBuilder, + }; + + fn has_clone3() -> bool { + // We use the probe syscall to check if the kernel supports clone3 or + // seccomp has successfully blocked clone3. + let res = unsafe { libc::syscall(libc::SYS_clone3, 0, 0) }; + let err = (res == -1) + .then(std::io::Error::last_os_error) + .expect("probe syscall should not succeed"); + err.raw_os_error() != Some(libc::ENOSYS) + } + + // To test the fallback behavior, we will create a seccomp rule that + // blocks `clone3` as ENOSYS. + let syscall = LinuxSyscallBuilder::default() + .names(vec![String::from("clone3")]) + .action(LinuxSeccompAction::ScmpActErrno) + .errno_ret(libc::ENOSYS as u32) + .build()?; + let seccomp_profile = LinuxSeccompBuilder::default() + .default_action(LinuxSeccompAction::ScmpActAllow) + .architectures(vec![Arch::ScmpArchNative]) + .syscalls(vec![syscall]) + .build()?; + + crate::test_utils::test_in_child_process(|| { + // We use seccomp to block `clone3` + let _ = prctl::set_no_new_privileges(true); + crate::seccomp::initialize_seccomp(&seccomp_profile) + .expect("failed to initialize seccomp"); + + if has_clone3() { + return Err(TestCallbackError::Custom( + "clone3 is not blocked by seccomp".into(), + )); + } + + let pid = container_clone(Box::new(|| 0)).map_err(|err| err.to_string())?; + match waitpid(pid, None).expect("wait pid failed.") { + WaitStatus::Exited(p, status) => { + assert_eq!(pid, p); + assert_eq!(status, 0); + } + status => { + return Err(TestCallbackError::Custom(format!( + "failed to wait on child process: {:?}", + status + ))); + } + }; + + Ok(()) + })?; + + Ok(()) + } } diff --git a/crates/libcontainer/src/process/message.rs b/crates/libcontainer/src/process/message.rs index 8de9de7c8..9b04723f4 100644 --- a/crates/libcontainer/src/process/message.rs +++ b/crates/libcontainer/src/process/message.rs @@ -2,7 +2,7 @@ use core::fmt; use serde::{Deserialize, Serialize}; /// Used as a wrapper for messages to be sent between child and parent processes -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize, Clone)] pub enum Message { IntermediateReady(i32), InitReady, diff --git a/crates/libcontainer/src/rootless.rs b/crates/libcontainer/src/rootless.rs index 31ec7b23e..87eff2495 100644 --- a/crates/libcontainer/src/rootless.rs +++ b/crates/libcontainer/src/rootless.rs @@ -122,15 +122,15 @@ pub enum MappingError { } #[derive(Debug, Clone, Default)] -pub struct Rootless<'a> { +pub struct Rootless { /// Location of the newuidmap binary pub newuidmap: Option, /// Location of the newgidmap binary pub newgidmap: Option, /// Mappings for user ids - pub(crate) uid_mappings: Option<&'a Vec>, + pub(crate) uid_mappings: Option>, /// Mappings for group ids - pub(crate) gid_mappings: Option<&'a Vec>, + pub(crate) gid_mappings: Option>, /// Info on the user namespaces pub user_namespace: Option, /// Is rootless container requested by a privileged user @@ -139,8 +139,8 @@ pub struct Rootless<'a> { pub rootless_id_mapper: RootlessIDMapper, } -impl<'a> Rootless<'a> { - pub fn new(spec: &'a Spec) -> Result>> { +impl Rootless { + pub fn new(spec: &Spec) -> Result> { let linux = spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; let namespaces = Namespaces::try_from(linux.namespaces().as_ref()) .map_err(ValidateSpecError::Namespaces)?; @@ -176,7 +176,7 @@ impl<'a> Rootless<'a> { pub fn write_uid_mapping(&self, target_pid: Pid) -> Result<()> { tracing::debug!("write UID mapping for {:?}", target_pid); - if let Some(uid_mappings) = self.uid_mappings { + if let Some(uid_mappings) = self.uid_mappings.as_ref() { write_id_mapping( target_pid, self.rootless_id_mapper.get_uid_path(&target_pid).as_path(), @@ -189,7 +189,7 @@ impl<'a> Rootless<'a> { pub fn write_gid_mapping(&self, target_pid: Pid) -> Result<()> { tracing::debug!("write GID mapping for {:?}", target_pid); - if let Some(gid_mappings) = self.gid_mappings { + if let Some(gid_mappings) = self.gid_mappings.as_ref() { write_id_mapping( target_pid, self.rootless_id_mapper.get_gid_path(&target_pid).as_path(), @@ -205,10 +205,10 @@ impl<'a> Rootless<'a> { } } -impl<'a> TryFrom<&'a Linux> for Rootless<'a> { +impl TryFrom<&Linux> for Rootless { type Error = RootlessError; - fn try_from(linux: &'a Linux) -> Result { + fn try_from(linux: &Linux) -> Result { let namespaces = Namespaces::try_from(linux.namespaces().as_ref()) .map_err(ValidateSpecError::Namespaces)?; let user_namespace = namespaces @@ -217,8 +217,8 @@ impl<'a> TryFrom<&'a Linux> for Rootless<'a> { Ok(Self { newuidmap: None, newgidmap: None, - uid_mappings: linux.uid_mappings().as_ref(), - gid_mappings: linux.gid_mappings().as_ref(), + uid_mappings: linux.uid_mappings().to_owned(), + gid_mappings: linux.gid_mappings().to_owned(), user_namespace: user_namespace.cloned(), privileged: nix::unistd::geteuid().is_root(), rootless_id_mapper: RootlessIDMapper::new(), diff --git a/crates/libcontainer/src/syscall/syscall.rs b/crates/libcontainer/src/syscall/syscall.rs index 587963839..106bde0df 100644 --- a/crates/libcontainer/src/syscall/syscall.rs +++ b/crates/libcontainer/src/syscall/syscall.rs @@ -56,10 +56,31 @@ pub trait Syscall { ) -> Result<()>; } -pub fn create_syscall() -> Box { - if cfg!(test) { - Box::::default() - } else { - Box::new(LinuxSyscall) +#[derive(Clone, Copy)] +pub enum SyscallType { + Linux, + Test, +} + +impl Default for SyscallType { + fn default() -> Self { + if cfg!(test) { + SyscallType::Test + } else { + SyscallType::Linux + } } } + +impl SyscallType { + pub fn create_syscall(&self) -> Box { + match self { + SyscallType::Linux => Box::new(LinuxSyscall), + SyscallType::Test => Box::::default(), + } + } +} + +pub fn create_syscall() -> Box { + SyscallType::default().create_syscall() +} diff --git a/crates/libcontainer/src/workload/default.rs b/crates/libcontainer/src/workload/default.rs index 923c96dae..b59a3e6b5 100644 --- a/crates/libcontainer/src/workload/default.rs +++ b/crates/libcontainer/src/workload/default.rs @@ -5,13 +5,8 @@ use oci_spec::runtime::Spec; use super::{Executor, ExecutorError, EMPTY}; -const EXECUTOR_NAME: &str = "default"; - -#[derive(Default)] -pub struct DefaultExecutor {} - -impl Executor for DefaultExecutor { - fn exec(&self, spec: &Spec) -> Result<(), ExecutorError> { +pub fn get_executor() -> Executor { + Box::new(|spec: &Spec| -> Result<(), ExecutorError> { tracing::debug!("executing workload with default handler"); let args = spec .process() @@ -41,13 +36,5 @@ impl Executor for DefaultExecutor { // After execvp is called, the process is replaced with the container // payload through execvp, so it should never reach here. unreachable!(); - } - - fn can_handle(&self, _: &Spec) -> bool { - true - } - - fn name(&self) -> &'static str { - EXECUTOR_NAME - } + }) } diff --git a/crates/libcontainer/src/workload/mod.rs b/crates/libcontainer/src/workload/mod.rs index bf06909c4..bae41e04d 100644 --- a/crates/libcontainer/src/workload/mod.rs +++ b/crates/libcontainer/src/workload/mod.rs @@ -12,52 +12,8 @@ pub enum ExecutorError { Execution(#[from] Box), #[error("{0}")] Other(String), + #[error("{0} executor can't handle spec")] + CantHandle(&'static str), } -pub trait Executor { - /// Executes the workload - fn exec(&self, spec: &Spec) -> Result<(), ExecutorError>; - - /// Checks if the handler is able to handle the workload - fn can_handle(&self, spec: &Spec) -> bool; - - /// The name of the handler - fn name(&self) -> &'static str; -} - -#[derive(Debug, thiserror::Error)] -pub enum ExecutorManagerError { - #[error("failed executor {name}")] - ExecutionFailed { - source: Box, - name: String, - }, - #[error("failed to find an executor that satisfies all requirements")] - NoExecutorFound, -} - -/// Manage the functions that actually run on the container -pub struct ExecutorManager { - pub executors: Vec>, -} - -impl ExecutorManager { - pub fn exec(&self, spec: &Spec) -> Result<(), ExecutorManagerError> { - if self.executors.is_empty() { - return Err(ExecutorManagerError::NoExecutorFound); - }; - - for executor in self.executors.iter() { - if executor.can_handle(spec) { - return executor.exec(spec).map_err(|e| { - tracing::error!(err = ?e, name = ?executor.name(), "failed to execute workload"); - ExecutorManagerError::ExecutionFailed { - source: e.into(), - name: executor.name().to_string(), - } - }); - } - } - Err(ExecutorManagerError::NoExecutorFound) - } -} +pub type Executor = Box Result<(), ExecutorError>>; diff --git a/crates/youki/src/commands/create.rs b/crates/youki/src/commands/create.rs index 7aea7c45f..009a7854d 100644 --- a/crates/youki/src/commands/create.rs +++ b/crates/youki/src/commands/create.rs @@ -2,10 +2,10 @@ use anyhow::Result; use std::path::PathBuf; -use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::create_syscall}; +use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::SyscallType}; use liboci_cli::Create; -use crate::workload::executor::default_executors; +use crate::workload::executor::default_executor; // One thing to note is that in the end, container is just another process in Linux // it has specific/different control group, namespace, using which program executing in it @@ -13,9 +13,8 @@ use crate::workload::executor::default_executors; // it is running, it is just another process, and has attributes such as pid, file descriptors, etc. // associated with it like any other process. pub fn create(args: Create, root_path: PathBuf, systemd_cgroup: bool) -> Result<()> { - let syscall = create_syscall(); - ContainerBuilder::new(args.container_id.clone(), syscall.as_ref()) - .with_executor(default_executors())? + ContainerBuilder::new(args.container_id.clone(), SyscallType::default()) + .with_executor(default_executor()) .with_pid_file(args.pid_file.as_ref())? .with_console_socket(args.console_socket.as_ref()) .with_root_path(root_path)? diff --git a/crates/youki/src/commands/exec.rs b/crates/youki/src/commands/exec.rs index f3ac65fa9..859e88a76 100644 --- a/crates/youki/src/commands/exec.rs +++ b/crates/youki/src/commands/exec.rs @@ -2,15 +2,14 @@ use anyhow::Result; use nix::sys::wait::{waitpid, WaitStatus}; use std::path::PathBuf; -use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::create_syscall}; +use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::SyscallType}; use liboci_cli::Exec; -use crate::workload::executor::default_executors; +use crate::workload::executor::default_executor; pub fn exec(args: Exec, root_path: PathBuf) -> Result { - let syscall = create_syscall(); - let pid = ContainerBuilder::new(args.container_id.clone(), syscall.as_ref()) - .with_executor(default_executors())? + let pid = ContainerBuilder::new(args.container_id.clone(), SyscallType::default()) + .with_executor(default_executor()) .with_root_path(root_path)? .with_console_socket(args.console_socket.as_ref()) .with_pid_file(args.pid_file.as_ref())? diff --git a/crates/youki/src/commands/mod.rs b/crates/youki/src/commands/mod.rs index 7cd3669ac..520e004cb 100644 --- a/crates/youki/src/commands/mod.rs +++ b/crates/youki/src/commands/mod.rs @@ -58,12 +58,11 @@ fn create_cgroup_manager>( container_id: &str, ) -> Result { let container = load_container(root_path, container_id)?; - let cgroups_path = container.spec()?.cgroup_path; - let systemd_cgroup = container.systemd(); + let cgroup_config = libcgroups::common::CgroupConfig { + cgroup_path: container.spec()?.cgroup_path, + systemd_cgroup: container.systemd(), + container_name: container.id().to_string(), + }; - Ok(libcgroups::common::create_cgroup_manager( - cgroups_path, - systemd_cgroup, - container.id(), - )?) + Ok(libcgroups::common::create_cgroup_manager(&cgroup_config)?) } diff --git a/crates/youki/src/commands/run.rs b/crates/youki/src/commands/run.rs index 3a7c83d97..5f0e33124 100644 --- a/crates/youki/src/commands/run.rs +++ b/crates/youki/src/commands/run.rs @@ -1,7 +1,7 @@ use std::path::PathBuf; use anyhow::{Context, Result}; -use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::create_syscall}; +use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::SyscallType}; use liboci_cli::Run; use nix::{ sys::{ @@ -12,12 +12,11 @@ use nix::{ unistd::Pid, }; -use crate::workload::executor::default_executors; +use crate::workload::executor::default_executor; pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result { - let syscall = create_syscall(); - let mut container = ContainerBuilder::new(args.container_id.clone(), syscall.as_ref()) - .with_executor(default_executors())? + let mut container = ContainerBuilder::new(args.container_id.clone(), SyscallType::default()) + .with_executor(default_executor()) .with_pid_file(args.pid_file.as_ref())? .with_console_socket(args.console_socket.as_ref()) .with_root_path(root_path)? diff --git a/crates/youki/src/workload/executor.rs b/crates/youki/src/workload/executor.rs index e38e33640..9565c16b5 100644 --- a/crates/youki/src/workload/executor.rs +++ b/crates/youki/src/workload/executor.rs @@ -1,13 +1,29 @@ -use libcontainer::workload::{default::DefaultExecutor, Executor}; +use libcontainer::oci_spec::runtime::Spec; +use libcontainer::workload::{Executor, ExecutorError}; -pub fn default_executors() -> Vec> { - vec![ +pub fn default_executor() -> Executor { + Box::new(|spec: &Spec| -> Result<(), ExecutorError> { #[cfg(feature = "wasm-wasmer")] - Box::::default(), + match super::wasmer::get_executor()(spec) { + Ok(_) => return Ok(()), + Err(ExecutorError::CantHandle(_)) => (), + Err(err) => return Err(err), + } #[cfg(feature = "wasm-wasmedge")] - Box::::default(), + match super::wasmedge::get_executor()(spec) { + Ok(_) => return Ok(()), + Err(ExecutorError::CantHandle(_)) => (), + Err(err) => return Err(err), + } #[cfg(feature = "wasm-wasmtime")] - Box::::default(), - Box::::default(), - ] + match super::wasmtime::get_executor()(spec) { + Ok(_) => return Ok(()), + Err(ExecutorError::CantHandle(_)) => (), + Err(err) => return Err(err), + } + + // Leave the default executor as the last option, which executes normal + // container workloads. + libcontainer::workload::default::get_executor()(spec) + }) } diff --git a/crates/youki/src/workload/wasmedge.rs b/crates/youki/src/workload/wasmedge.rs index 9ab50a34f..eb813e3a6 100644 --- a/crates/youki/src/workload/wasmedge.rs +++ b/crates/youki/src/workload/wasmedge.rs @@ -8,11 +8,14 @@ use libcontainer::workload::{Executor, ExecutorError}; const EXECUTOR_NAME: &str = "wasmedge"; -#[derive(Default)] -pub struct WasmEdgeExecutor {} +pub fn get_executor() -> Executor { + Box::new(|spec: &Spec| -> Result<(), ExecutorError> { + if !can_handle(spec) { + return Err(ExecutorError::CantHandle(EXECUTOR_NAME)); + } + + tracing::debug!("executing workload with wasmedge handler"); -impl Executor for WasmEdgeExecutor { - fn exec(&self, spec: &Spec) -> Result<(), ExecutorError> { // parse wasi parameters let args = get_args(spec); let mut cmd = args[0].clone(); @@ -55,25 +58,21 @@ impl Executor for WasmEdgeExecutor { .map_err(|err| ExecutorError::Execution(err))?; Ok(()) - } - - fn can_handle(&self, spec: &Spec) -> bool { - if let Some(annotations) = spec.annotations() { - if let Some(handler) = annotations.get("run.oci.handler") { - return handler == "wasm"; - } + }) +} - if let Some(variant) = annotations.get("module.wasm.image/variant") { - return variant == "compat"; - } +fn can_handle(spec: &Spec) -> bool { + if let Some(annotations) = spec.annotations() { + if let Some(handler) = annotations.get("run.oci.handler") { + return handler == "wasm"; } - false + if let Some(variant) = annotations.get("module.wasm.image/variant") { + return variant == "compat"; + } } - fn name(&self) -> &'static str { - EXECUTOR_NAME - } + false } fn get_args(spec: &Spec) -> &[String] { diff --git a/crates/youki/src/workload/wasmer.rs b/crates/youki/src/workload/wasmer.rs index 1fd5d8ce0..1f0a4b824 100644 --- a/crates/youki/src/workload/wasmer.rs +++ b/crates/youki/src/workload/wasmer.rs @@ -6,11 +6,12 @@ use libcontainer::workload::{Executor, ExecutorError, EMPTY}; const EXECUTOR_NAME: &str = "wasmer"; -#[derive(Default)] -pub struct WasmerExecutor {} +pub fn get_executor() -> Executor { + Box::new(|spec: &Spec| -> Result<(), ExecutorError> { + if !can_handle(spec) { + return Err(ExecutorError::CantHandle(EXECUTOR_NAME)); + } -impl Executor for WasmerExecutor { - fn exec(&self, spec: &Spec) -> Result<(), ExecutorError> { tracing::debug!("executing workload with wasmer handler"); let process = spec.process().as_ref(); @@ -75,25 +76,21 @@ impl Executor for WasmerExecutor { wasi_env.cleanup(&mut store, None); Ok(()) - } - - fn can_handle(&self, spec: &Spec) -> bool { - if let Some(annotations) = spec.annotations() { - if let Some(handler) = annotations.get("run.oci.handler") { - return handler == "wasm"; - } + }) +} - if let Some(variant) = annotations.get("module.wasm.image/variant") { - return variant == "compat"; - } +fn can_handle(spec: &Spec) -> bool { + if let Some(annotations) = spec.annotations() { + if let Some(handler) = annotations.get("run.oci.handler") { + return handler == "wasm"; } - false + if let Some(variant) = annotations.get("module.wasm.image/variant") { + return variant == "compat"; + } } - fn name(&self) -> &'static str { - EXECUTOR_NAME - } + false } #[cfg(test)] @@ -112,7 +109,7 @@ mod tests { .build() .context("build spec")?; - assert!(WasmerExecutor::default().can_handle(&spec)); + assert!(can_handle(&spec)); Ok(()) } @@ -126,7 +123,7 @@ mod tests { .build() .context("build spec")?; - assert!(WasmerExecutor::default().can_handle(&spec)); + assert!(can_handle(&spec)); Ok(()) } @@ -135,7 +132,7 @@ mod tests { fn test_can_handle_no_execute() -> Result<()> { let spec = SpecBuilder::default().build().context("build spec")?; - assert!(!WasmerExecutor::default().can_handle(&spec)); + assert!(!can_handle(&spec)); Ok(()) } diff --git a/crates/youki/src/workload/wasmtime.rs b/crates/youki/src/workload/wasmtime.rs index afa0e4e9d..6834ea617 100644 --- a/crates/youki/src/workload/wasmtime.rs +++ b/crates/youki/src/workload/wasmtime.rs @@ -6,12 +6,13 @@ use libcontainer::workload::{Executor, ExecutorError, EMPTY}; const EXECUTOR_NAME: &str = "wasmtime"; -#[derive(Default)] -pub struct WasmtimeExecutor {} +pub fn get_executor() -> Executor { + Box::new(|spec: &Spec| -> Result<(), ExecutorError> { + if !can_handle(spec) { + return Err(ExecutorError::CantHandle(EXECUTOR_NAME)); + } -impl Executor for WasmtimeExecutor { - fn exec(&self, spec: &Spec) -> Result<(), ExecutorError> { - tracing::info!("Executing workload with wasmtime handler"); + tracing::debug!("executing workload with wasmtime handler"); let process = spec.process().as_ref(); let args = spec @@ -85,23 +86,19 @@ impl Executor for WasmtimeExecutor { start .call(&mut store, &[], &mut []) .map_err(|err| ExecutorError::Execution(err.into())) - } - - fn can_handle(&self, spec: &Spec) -> bool { - if let Some(annotations) = spec.annotations() { - if let Some(handler) = annotations.get("run.oci.handler") { - return handler == "wasm"; - } + }) +} - if let Some(variant) = annotations.get("module.wasm.image/variant") { - return variant == "compat"; - } +fn can_handle(spec: &Spec) -> bool { + if let Some(annotations) = spec.annotations() { + if let Some(handler) = annotations.get("run.oci.handler") { + return handler == "wasm"; } - false + if let Some(variant) = annotations.get("module.wasm.image/variant") { + return variant == "compat"; + } } - fn name(&self) -> &'static str { - EXECUTOR_NAME - } + false } diff --git a/scripts/musl_test.sh b/scripts/musl_test.sh index 7e1dcfdd4..3007b097c 100755 --- a/scripts/musl_test.sh +++ b/scripts/musl_test.sh @@ -6,13 +6,13 @@ test_musl() { cargo +nightly build \ -Zbuild-std \ --target $(uname -m)-unknown-linux-musl \ - --package libcontainer \ - --no-default-features -F v2 + --package "$1" \ + --no-default-features -F "$2" cargo +nightly test \ -Zbuild-std \ --target $(uname -m)-unknown-linux-musl \ - --package libcontainer \ - --no-default-features -F v2 + --package "$1" \ + --no-default-features -F "$2" } test_musl "libcontainer" "v1"