From 501f74f994db8472b0bc55cda335fbefc7bb996f Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 25 Apr 2025 17:24:20 -0400 Subject: [PATCH 01/10] [sled-agent] Integrate config-reconciler --- clients/sled-agent-client/src/lib.rs | 1 - dev-tools/omdb/src/bin/omdb/sled_agent.rs | 52 - nexus-sled-agent-shared/src/inventory.rs | 15 +- .../execution/src/omicron_sled_config.rs | 79 +- openapi/sled-agent.json | 313 +-- sled-agent/Cargo.toml | 1 + sled-agent/api/src/lib.rs | 31 +- sled-agent/src/artifact_store.rs | 81 +- sled-agent/src/bootstrap/bootstore_setup.rs | 23 +- sled-agent/src/bootstrap/http_entrypoints.rs | 6 +- sled-agent/src/bootstrap/pre_server.rs | 15 +- sled-agent/src/bootstrap/rack_ops.rs | 12 +- sled-agent/src/bootstrap/rss_handle.rs | 6 +- sled-agent/src/bootstrap/server.rs | 47 +- sled-agent/src/hardware_monitor.rs | 55 +- sled-agent/src/http_entrypoints.rs | 58 +- sled-agent/src/instance.rs | 114 +- sled-agent/src/instance_manager.rs | 94 +- sled-agent/src/lib.rs | 1 - sled-agent/src/long_running_tasks.rs | 111 +- sled-agent/src/probe_manager.rs | 158 +- sled-agent/src/rack_setup/service.rs | 138 +- sled-agent/src/services.rs | 1949 +---------------- sled-agent/src/sim/http_entrypoints.rs | 29 +- sled-agent/src/sim/sled_agent.rs | 24 +- sled-agent/src/sled_agent.rs | 604 ++--- sled-agent/src/storage_monitor.rs | 116 - sled-agent/src/support_bundle/logs.rs | 20 +- sled-agent/src/support_bundle/storage.rs | 146 +- sled-agent/src/zone_bundle.rs | 57 +- 30 files changed, 782 insertions(+), 3574 deletions(-) delete mode 100644 sled-agent/src/storage_monitor.rs diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index b27f3048439..5eb8c3ecafe 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -67,7 +67,6 @@ progenitor::generate_api!( OmicronPhysicalDiskConfig = omicron_common::disk::OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig = omicron_common::disk::OmicronPhysicalDisksConfig, OmicronSledConfig = nexus_sled_agent_shared::inventory::OmicronSledConfig, - OmicronSledConfigResult = nexus_sled_agent_shared::inventory::OmicronSledConfigResult, OmicronZoneConfig = nexus_sled_agent_shared::inventory::OmicronZoneConfig, OmicronZoneDataset = nexus_sled_agent_shared::inventory::OmicronZoneDataset, OmicronZoneImageSource = nexus_sled_agent_shared::inventory::OmicronZoneImageSource, diff --git a/dev-tools/omdb/src/bin/omdb/sled_agent.rs b/dev-tools/omdb/src/bin/omdb/sled_agent.rs index d02203e052b..fbd6260bd8f 100644 --- a/dev-tools/omdb/src/bin/omdb/sled_agent.rs +++ b/dev-tools/omdb/src/bin/omdb/sled_agent.rs @@ -34,14 +34,6 @@ enum SledAgentCommands { #[clap(subcommand)] Zones(ZoneCommands), - /// print information about zpools - #[clap(subcommand)] - Zpools(ZpoolCommands), - - /// print information about datasets - #[clap(subcommand)] - Datasets(DatasetCommands), - /// print information about the local bootstore node #[clap(subcommand)] Bootstore(BootstoreCommands), @@ -97,12 +89,6 @@ impl SledAgentArgs { SledAgentCommands::Zones(ZoneCommands::List) => { cmd_zones_list(&client).await } - SledAgentCommands::Zpools(ZpoolCommands::List) => { - cmd_zpools_list(&client).await - } - SledAgentCommands::Datasets(DatasetCommands::List) => { - cmd_datasets_list(&client).await - } SledAgentCommands::Bootstore(BootstoreCommands::Status) => { cmd_bootstore_status(&client).await } @@ -129,44 +115,6 @@ async fn cmd_zones_list( Ok(()) } -/// Runs `omdb sled-agent zpools list` -async fn cmd_zpools_list( - client: &sled_agent_client::Client, -) -> Result<(), anyhow::Error> { - let response = client.zpools_get().await.context("listing zpools")?; - let zpools = response.into_inner(); - - println!("zpools:"); - if zpools.is_empty() { - println!(" "); - } - for zpool in &zpools { - println!(" {:?}", zpool); - } - - Ok(()) -} - -/// Runs `omdb sled-agent datasets list` -async fn cmd_datasets_list( - client: &sled_agent_client::Client, -) -> Result<(), anyhow::Error> { - let response = client.datasets_get().await.context("listing datasets")?; - let response = response.into_inner(); - - println!("dataset configuration @ generation {}:", response.generation); - let datasets = response.datasets; - - if datasets.is_empty() { - println!(" "); - } - for dataset in &datasets { - println!(" {:?}", dataset); - } - - Ok(()) -} - /// Runs `omdb sled-agent bootstore status` async fn cmd_bootstore_status( client: &sled_agent_client::Client, diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 4b1b525e0e6..8495c774c49 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -18,10 +18,7 @@ use omicron_common::{ external::{ByteCount, Generation}, internal::shared::{NetworkInterface, SourceNatConfig}, }, - disk::{ - DatasetConfig, DatasetManagementStatus, DiskManagementStatus, - DiskVariant, OmicronPhysicalDiskConfig, - }, + disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig}, zpool_name::ZpoolName, }; use omicron_uuid_kinds::{DatasetUuid, OmicronZoneUuid}; @@ -186,8 +183,6 @@ pub enum SledRole { } /// Describes the set of Reconfigurator-managed configuration elements of a sled -// TODO this struct should have a generation number; at the moment, each of -// the fields has a separete one internally. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] pub struct OmicronSledConfig { pub generation: Generation, @@ -222,14 +217,6 @@ impl Ledgerable for OmicronSledConfig { } } -/// Result of the currently-synchronous `omicron_config_put` endpoint. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -#[must_use = "this `DatasetManagementResult` may contain errors, which should be handled"] -pub struct OmicronSledConfigResult { - pub disks: Vec, - pub datasets: Vec, -} - /// Describes the set of Omicron-managed zones running on a sled #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, diff --git a/nexus/reconfigurator/execution/src/omicron_sled_config.rs b/nexus/reconfigurator/execution/src/omicron_sled_config.rs index bd08e96a859..70ca8493a4a 100644 --- a/nexus/reconfigurator/execution/src/omicron_sled_config.rs +++ b/nexus/reconfigurator/execution/src/omicron_sled_config.rs @@ -10,15 +10,13 @@ use anyhow::anyhow; use futures::StreamExt; use futures::stream; use nexus_db_queries::context::OpContext; -use nexus_sled_agent_shared::inventory::OmicronSledConfigResult; use nexus_types::deployment::BlueprintSledConfig; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; -use slog::Logger; use slog::info; use slog::warn; +use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; -use update_engine::merge_anyhow_list; /// Idempotently ensure that the specified Omicron sled configs are deployed to /// the corresponding sleds @@ -63,13 +61,14 @@ pub(crate) async fn deploy_sled_configs( format!("Failed to put {config:#?} to sled {sled_id}") }); match result { + Ok(_) => None, Err(error) => { - warn!(log, "{error:#}"); + warn!( + log, "failed to put sled config"; + InlineErrorChain::new(error.as_ref()), + ); Some(error) } - Ok(result) => { - parse_config_result(result.into_inner(), &log).err() - } } }) .collect() @@ -78,69 +77,6 @@ pub(crate) async fn deploy_sled_configs( if errors.is_empty() { Ok(()) } else { Err(errors) } } -fn parse_config_result( - result: OmicronSledConfigResult, - log: &Logger, -) -> anyhow::Result<()> { - let (disk_errs, disk_successes): (Vec<_>, Vec<_>) = - result.disks.into_iter().partition(|status| status.err.is_some()); - - if !disk_errs.is_empty() { - warn!( - log, - "Failed to deploy disks for sled agent"; - "successfully configured disks" => disk_successes.len(), - "failed disk configurations" => disk_errs.len(), - ); - for err in &disk_errs { - warn!(log, "{err:?}"); - } - return Err(merge_anyhow_list(disk_errs.into_iter().map(|status| { - anyhow!( - "failed to deploy disk {:?}: {:#}", - status.identity, - // `disk_errs` was partitioned by `status.err.is_some()`, so - // this is safe to unwrap. - status.err.unwrap(), - ) - }))); - } - - let (dataset_errs, dataset_successes): (Vec<_>, Vec<_>) = - result.datasets.into_iter().partition(|status| status.err.is_some()); - - if !dataset_errs.is_empty() { - warn!( - log, - "Failed to deploy datasets for sled agent"; - "successfully configured datasets" => dataset_successes.len(), - "failed dataset configurations" => dataset_errs.len(), - ); - for err in &dataset_errs { - warn!(log, "{err:?}"); - } - return Err(merge_anyhow_list(dataset_errs.into_iter().map( - |status| { - anyhow!( - "failed to deploy dataset {}: {:#}", - status.dataset_name.full_name(), - // `dataset_errs` was partitioned by `status.err.is_some()`, - // so this is safe to unwrap. - status.err.unwrap(), - ) - }, - ))); - } - - info!( - log, - "Successfully deployed config to sled agent"; - "successfully configured disks" => disk_successes.len(), - "successfully configured datasets" => dataset_successes.len(), - ); - Ok(()) -} - #[cfg(test)] mod tests { use super::*; @@ -327,6 +263,9 @@ mod tests { // Observe the latest configuration stored on the simulated sled agent, // and verify that this output matches the input. + // + // TODO-cleanup Simulated sled-agent should report a unified + // `OmicronSledConfig`. let observed_disks = sim_sled_agent.omicron_physical_disks_list().unwrap(); let observed_datasets = sim_sled_agent.datasets_config_list().unwrap(); diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 4279de8605d..6f7c0b62dcd 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -334,30 +334,6 @@ } } }, - "/datasets": { - "get": { - "summary": "Lists the datasets that this sled is configured to use", - "operationId": "datasets_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DatasetsConfig" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/disks/{disk_id}": { "put": { "operationId": "disk_put", @@ -516,38 +492,8 @@ "required": true }, "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/OmicronSledConfigResult" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, - "/omicron-physical-disks": { - "get": { - "operationId": "omicron_physical_disks_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" - } - } - } + "204": { + "description": "resource updated" }, "4XX": { "$ref": "#/components/responses/Error" @@ -2181,33 +2127,6 @@ } } } - }, - "/zpools": { - "get": { - "operationId": "zpools_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "title": "Array_of_Zpool", - "type": "array", - "items": { - "$ref": "#/components/schemas/Zpool" - } - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } } }, "components": { @@ -3815,22 +3734,6 @@ "description": "The kind of dataset. See the `DatasetKind` enum in omicron-common for possible values.", "type": "string" }, - "DatasetManagementStatus": { - "description": "Identifies how a single dataset management operation may have succeeded or failed.", - "type": "object", - "properties": { - "dataset_name": { - "$ref": "#/components/schemas/DatasetName" - }, - "err": { - "nullable": true, - "type": "string" - } - }, - "required": [ - "dataset_name" - ] - }, "DatasetName": { "type": "object", "properties": { @@ -3846,29 +3749,6 @@ "pool_name" ] }, - "DatasetsConfig": { - "type": "object", - "properties": { - "datasets": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/DatasetConfig" - } - }, - "generation": { - "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\n\nNote that \"Generation::new()\", AKA, the first generation number, is reserved for \"no datasets\". This is the default configuration for a sled before any requests have been made.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - } - }, - "required": [ - "datasets", - "generation" - ] - }, "DhcpConfig": { "description": "DHCP configuration for a port\n\nNot present here: Hostname (DHCPv4 option 12; used in DHCPv6 option 39); we use `InstanceRuntimeState::hostname` for this value.", "type": "object", @@ -3945,128 +3825,6 @@ "vendor" ] }, - "DiskManagementError": { - "oneOf": [ - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "not_found" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "internal_disk_control_plane_request" - ] - }, - "value": { - "$ref": "#/components/schemas/TypedUuidForPhysicalDiskKind" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "zpool_uuid_mismatch" - ] - }, - "value": { - "type": "object", - "properties": { - "expected": { - "$ref": "#/components/schemas/TypedUuidForZpoolKind" - }, - "observed": { - "$ref": "#/components/schemas/TypedUuidForZpoolKind" - } - }, - "required": [ - "expected", - "observed" - ] - } - }, - "required": [ - "type", - "value" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "key_manager" - ] - }, - "value": { - "type": "string" - } - }, - "required": [ - "type", - "value" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "other" - ] - }, - "value": { - "type": "string" - } - }, - "required": [ - "type", - "value" - ] - } - ] - }, - "DiskManagementStatus": { - "description": "Identifies how a single disk management operation may have succeeded or failed.", - "type": "object", - "properties": { - "err": { - "nullable": true, - "allOf": [ - { - "$ref": "#/components/schemas/DiskManagementError" - } - ] - }, - "identity": { - "$ref": "#/components/schemas/DiskIdentity" - } - }, - "required": [ - "identity" - ] - }, "DiskRuntimeState": { "description": "Runtime state of the Disk, which includes its attach state and some minimal metadata", "type": "object", @@ -4365,13 +4123,6 @@ } ] }, - "DiskType": { - "type": "string", - "enum": [ - "U2", - "M2" - ] - }, "DiskVariant": { "type": "string", "enum": [ @@ -5629,29 +5380,6 @@ "pool_id" ] }, - "OmicronPhysicalDisksConfig": { - "type": "object", - "properties": { - "disks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OmicronPhysicalDiskConfig" - } - }, - "generation": { - "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - } - }, - "required": [ - "disks", - "generation" - ] - }, "OmicronSledConfig": { "description": "Describes the set of Reconfigurator-managed configuration elements of a sled", "type": "object", @@ -5684,28 +5412,6 @@ "zones" ] }, - "OmicronSledConfigResult": { - "description": "Result of the currently-synchronous `omicron_config_put` endpoint.", - "type": "object", - "properties": { - "datasets": { - "type": "array", - "items": { - "$ref": "#/components/schemas/DatasetManagementStatus" - } - }, - "disks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/DiskManagementStatus" - } - } - }, - "required": [ - "datasets", - "disks" - ] - }, "OmicronZoneConfig": { "description": "Describes one Omicron-managed zone running on a sled", "type": "object", @@ -7762,21 +7468,6 @@ "version" ] }, - "Zpool": { - "type": "object", - "properties": { - "disk_type": { - "$ref": "#/components/schemas/DiskType" - }, - "id": { - "$ref": "#/components/schemas/TypedUuidForZpoolKind" - } - }, - "required": [ - "disk_type", - "id" - ] - }, "ZpoolName": { "title": "The name of a Zpool", "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 1b3fb78698b..acacb4cc1d6 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -138,6 +138,7 @@ tokio-stream.workspace = true tokio-util.workspace = true illumos-utils = { workspace = true, features = ["testing"] } +sled-agent-config-reconciler = { workspace = true, features = ["testing"] } sled-storage = { workspace = true, features = ["testing"] } # diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index 7bdba01e1bc..bd4242a347f 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -13,7 +13,7 @@ use dropshot::{ TypedBody, }; use nexus_sled_agent_shared::inventory::{ - Inventory, OmicronSledConfig, OmicronSledConfigResult, SledRole, + Inventory, OmicronSledConfig, SledRole, }; use omicron_common::{ api::external::Generation, @@ -24,7 +24,7 @@ use omicron_common::{ SledIdentifiers, SwitchPorts, VirtualNetworkInterfaceHost, }, }, - disk::{DatasetsConfig, DiskVariant, OmicronPhysicalDisksConfig}, + disk::DiskVariant, ledger::Ledgerable, }; use omicron_uuid_kinds::{ @@ -260,32 +260,7 @@ pub trait SledAgentApi { async fn omicron_config_put( rqctx: RequestContext, body: TypedBody, - ) -> Result, HttpError>; - - /// Lists the datasets that this sled is configured to use - #[endpoint { - method = GET, - path = "/datasets", - }] - async fn datasets_get( - rqctx: RequestContext, - ) -> Result, HttpError>; - - #[endpoint { - method = GET, - path = "/omicron-physical-disks", - }] - async fn omicron_physical_disks_get( - rqctx: RequestContext, - ) -> Result, HttpError>; - - #[endpoint { - method = GET, - path = "/zpools", - }] - async fn zpools_get( - rqctx: RequestContext, - ) -> Result>, HttpError>; + ) -> Result; #[endpoint { method = GET, diff --git a/sled-agent/src/artifact_store.rs b/sled-agent/src/artifact_store.rs index c2b6965a32a..e73461020d5 100644 --- a/sled-agent/src/artifact_store.rs +++ b/sled-agent/src/artifact_store.rs @@ -17,11 +17,11 @@ //! Operations that list or modify artifacts or the configuration are called by //! Nexus and handled by the Sled Agent API. -use std::collections::BTreeMap; use std::future::Future; use std::io::{ErrorKind, Write}; use std::net::SocketAddrV6; use std::str::FromStr; +use std::sync::Arc; use std::time::Duration; use atomicwrites::{AtomicFile, OverwriteBehavior}; @@ -40,8 +40,8 @@ use sha2::{Digest, Sha256}; use sled_agent_api::{ ArtifactConfig, ArtifactListResponse, ArtifactPutResponse, }; -use sled_storage::dataset::M2_ARTIFACT_DATASET; -use sled_storage::manager::StorageHandle; +use sled_agent_config_reconciler::ConfigReconcilerHandle; +use sled_agent_config_reconciler::InternalDisksReceiver; use slog::{Logger, error, info}; use slog_error_chain::{InlineErrorChain, SlogInlineError}; use tokio::fs::File; @@ -49,8 +49,6 @@ use tokio::sync::{OwnedSemaphorePermit, mpsc, oneshot, watch}; use tokio::task::JoinSet; use tufaceous_artifact::ArtifactHash; -use crate::services::ServiceManager; - // These paths are defined under the artifact storage dataset. They // cannot conflict with any artifact paths because all artifact paths are // hexadecimal-encoded SHA-256 checksums. @@ -93,7 +91,7 @@ impl ArtifactStore { pub(crate) async fn new( log: &Logger, storage: T, - services: Option, + config_reconciler: Option>, ) -> ArtifactStore { let log = log.new(slog::o!("component" => "ArtifactStore")); @@ -133,7 +131,7 @@ impl ArtifactStore { tokio::task::spawn(ledger_manager( log.clone(), ledger_paths, - services, + config_reconciler, ledger_rx, config_tx, )); @@ -164,13 +162,15 @@ impl ArtifactStore { } } -impl ArtifactStore { +impl ArtifactStore { pub(crate) async fn start( - self, + self: Arc, sled_address: SocketAddrV6, dropshot_config: &ConfigDropshot, - ) -> Result>, StartError> - { + ) -> Result< + dropshot::HttpServer>>, + StartError, + > { let mut depot_address = sled_address; depot_address.set_port(REPO_DEPOT_PORT); @@ -259,7 +259,7 @@ impl ArtifactStore { /// Open an artifact file by hash from a storage handle. /// /// This is the same as [ArtifactStore::get], but can be called with only - /// a [StorageHandle]. + /// a storage implementation. pub(crate) async fn get_from_storage( storage: &T, log: &Logger, @@ -452,11 +452,11 @@ type LedgerManagerRequest = async fn ledger_manager( log: Logger, ledger_paths: Vec, - services: Option, + config_reconciler: Option>, mut rx: mpsc::Receiver, config_channel: watch::Sender>, ) { - let services = services.as_ref(); + let config_reconciler = config_reconciler.as_ref(); let handle_request = async |new_config: ArtifactConfig| { if ledger_paths.is_empty() { return Err(Error::NoUpdateDataset); @@ -466,21 +466,11 @@ async fn ledger_manager( { if new_config.generation > ledger.data().generation { // New config generation. First check that the configuration - // contains all artifacts that are presently in use. - let mut missing = BTreeMap::new(); - // Check artifacts from the current zone configuration. - if let Some(services) = services { - for zone in services.omicron_zones_list().await.zones { - if let Some(hash) = zone.image_source.artifact_hash() { - if !new_config.artifacts.contains(&hash) { - missing - .insert(hash, "current zone configuration"); - } - } - } - } - if !missing.is_empty() { - return Err(Error::InUseArtifactsMissing(missing)); + // is valid against the current ledgered sled config. + if let Some(config_reconciler) = config_reconciler { + config_reconciler + .validate_artifact_config(new_config.clone()) + .await??; } // Everything looks okay; update the ledger. @@ -614,14 +604,11 @@ pub trait DatasetsManager: Clone + Send + Sync + 'static { } } -impl DatasetsManager for StorageHandle { +impl DatasetsManager for InternalDisksReceiver { async fn artifact_storage_paths( &self, ) -> impl Iterator + '_ { - self.get_latest_disks() - .await - .all_m2_mountpoints(M2_ARTIFACT_DATASET) - .into_iter() + self.current().all_artifact_datasets().collect::>().into_iter() } } @@ -766,11 +753,11 @@ impl ArtifactWriter { } /// Implementation of the Repo Depot API backed by an -/// `ArtifactStore`. +/// `ArtifactStore`. enum RepoDepotImpl {} impl RepoDepotApi for RepoDepotImpl { - type Context = ArtifactStore; + type Context = Arc>; async fn artifact_get_by_sha256( rqctx: RequestContext, @@ -831,8 +818,15 @@ pub enum Error { #[error("Digest mismatch: expected {expected}, actual {actual}")] HashMismatch { expected: ArtifactHash, actual: ArtifactHash }, - #[error("Artifacts in use are not present in new config: {0:?}")] - InUseArtifactsMissing(BTreeMap), + #[error("New config is invalid per current sled config")] + InvalidPerSledConfig( + #[from] sled_agent_config_reconciler::LedgerArtifactConfigError, + ), + + #[error("Cannot validate incoming config against sled config")] + CannotValidateAgainstSledConfig( + #[from] sled_agent_config_reconciler::LedgerTaskError, + ), #[error("Blocking task failed")] Join(#[source] tokio::task::JoinError), @@ -863,7 +857,7 @@ impl From for HttpError { match err { // 4xx errors Error::HashMismatch { .. } - | Error::InUseArtifactsMissing { .. } + | Error::InvalidPerSledConfig { .. } | Error::NoConfig | Error::NotInConfig { .. } => { HttpError::for_bad_request(None, err.to_string()) @@ -894,9 +888,12 @@ impl From for HttpError { | Error::File { .. } | Error::Join(_) | Error::LedgerCommit(_) - | Error::LedgerChannel => HttpError::for_internal_error( - InlineErrorChain::new(&err).to_string(), - ), + | Error::LedgerChannel + | Error::CannotValidateAgainstSledConfig(_) => { + HttpError::for_internal_error( + InlineErrorChain::new(&err).to_string(), + ) + } } } } diff --git a/sled-agent/src/bootstrap/bootstore_setup.rs b/sled-agent/src/bootstrap/bootstore_setup.rs index 1b148290104..c9e3cefbd8e 100644 --- a/sled-agent/src/bootstrap/bootstore_setup.rs +++ b/sled-agent/src/bootstrap/bootstore_setup.rs @@ -15,7 +15,6 @@ use omicron_ddm_admin_client::Client as DdmAdminClient; use sled_hardware_types::Baseboard; use sled_hardware_types::underlay::BootstrapInterface; use sled_storage::dataset::CLUSTER_DATASET; -use sled_storage::resources::AllDisks; use slog::Logger; use std::collections::BTreeSet; use std::net::Ipv6Addr; @@ -26,7 +25,7 @@ const BOOTSTORE_FSM_STATE_FILE: &str = "bootstore-fsm-state.json"; const BOOTSTORE_NETWORK_CONFIG_FILE: &str = "bootstore-network-config.json"; pub fn new_bootstore_config( - all_disks: &AllDisks, + cluster_dataset_paths: &[Utf8PathBuf], baseboard: Baseboard, global_zone_bootstrap_ip: Ipv6Addr, ) -> Result { @@ -37,19 +36,20 @@ pub fn new_bootstore_config( learn_timeout: Duration::from_secs(5), rack_init_timeout: Duration::from_secs(300), rack_secret_request_timeout: Duration::from_secs(5), - fsm_state_ledger_paths: bootstore_fsm_state_paths(&all_disks)?, + fsm_state_ledger_paths: bootstore_fsm_state_paths( + cluster_dataset_paths, + )?, network_config_ledger_paths: bootstore_network_config_paths( - &all_disks, + cluster_dataset_paths, )?, }) } fn bootstore_fsm_state_paths( - all_disks: &AllDisks, + cluster_dataset_paths: &[Utf8PathBuf], ) -> Result, StartError> { - let paths: Vec<_> = all_disks - .all_m2_mountpoints(CLUSTER_DATASET) - .into_iter() + let paths: Vec<_> = cluster_dataset_paths + .iter() .map(|p| p.join(BOOTSTORE_FSM_STATE_FILE)) .collect(); @@ -60,11 +60,10 @@ fn bootstore_fsm_state_paths( } fn bootstore_network_config_paths( - all_disks: &AllDisks, + cluster_dataset_paths: &[Utf8PathBuf], ) -> Result, StartError> { - let paths: Vec<_> = all_disks - .all_m2_mountpoints(CLUSTER_DATASET) - .into_iter() + let paths: Vec<_> = cluster_dataset_paths + .iter() .map(|p| p.join(BOOTSTORE_NETWORK_CONFIG_FILE)) .collect(); diff --git a/sled-agent/src/bootstrap/http_entrypoints.rs b/sled-agent/src/bootstrap/http_entrypoints.rs index 44bd09387ff..12a986cf2fb 100644 --- a/sled-agent/src/bootstrap/http_entrypoints.rs +++ b/sled-agent/src/bootstrap/http_entrypoints.rs @@ -23,10 +23,10 @@ use dropshot::{ use omicron_common::api::external::Error; use omicron_uuid_kinds::RackInitUuid; use omicron_uuid_kinds::RackResetUuid; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::rack_ops::RackOperationStatus; use sled_hardware_types::Baseboard; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; use sprockets_tls::keys::SprocketsConfig; @@ -37,7 +37,7 @@ use tokio::sync::{mpsc, oneshot}; pub(crate) struct BootstrapServerContext { pub(crate) base_log: Logger, pub(crate) global_zone_bootstrap_ip: Ipv6Addr, - pub(crate) storage_manager: StorageHandle, + pub(crate) internal_disks_rx: InternalDisksReceiver, pub(crate) bootstore_node_handle: bootstore::NodeHandle, pub(crate) baseboard: Baseboard, pub(crate) rss_access: RssAccess, @@ -56,7 +56,7 @@ impl BootstrapServerContext { &self.base_log, self.sprockets.clone(), self.global_zone_bootstrap_ip, - &self.storage_manager, + &self.internal_disks_rx, &self.bootstore_node_handle, request, ) diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index d5b7553d54b..546a523e3e0 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -20,7 +20,6 @@ use crate::long_running_tasks::{ LongRunningTaskHandles, spawn_all_longrunning_tasks, }; use crate::services::ServiceManager; -use crate::services::TimeSyncConfig; use crate::sled_agent::SledAgent; use camino::Utf8PathBuf; use cancel_safe_futures::TryStreamExt; @@ -134,23 +133,17 @@ impl BootstrapAgentStartup { let global_zone_bootstrap_ip = startup_networking.global_zone_bootstrap_ip; - let time_sync = if let Some(true) = config.skip_timesync { - TimeSyncConfig::Skip - } else { - TimeSyncConfig::Normal - }; - let service_manager = ServiceManager::new( &base_log, ddm_reconciler, startup_networking, sled_mode, - time_sync, config.sidecar_revision.clone(), config.switch_zone_maghemite_links.clone(), - long_running_task_handles.storage_manager.clone(), - long_running_task_handles.zone_bundler.clone(), - long_running_task_handles.zone_image_resolver.clone(), + long_running_task_handles + .config_reconciler + .internal_disks_rx() + .clone(), ); // Inform the hardware monitor that the service manager is ready diff --git a/sled-agent/src/bootstrap/rack_ops.rs b/sled-agent/src/bootstrap/rack_ops.rs index 2be59fd5880..db2e79d3ec9 100644 --- a/sled-agent/src/bootstrap/rack_ops.rs +++ b/sled-agent/src/bootstrap/rack_ops.rs @@ -9,9 +9,9 @@ use crate::rack_setup::service::SetupServiceError; use bootstore::schemes::v0 as bootstore; use omicron_uuid_kinds::RackInitUuid; use omicron_uuid_kinds::RackResetUuid; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::rack_ops::{RackOperationStatus, RssStep}; -use sled_storage::manager::StorageHandle; use slog::Logger; use sprockets_tls::keys::SprocketsConfig; use std::mem; @@ -146,7 +146,7 @@ impl RssAccess { parent_log: &Logger, sprockets: SprocketsConfig, global_zone_bootstrap_ip: Ipv6Addr, - storage_manager: &StorageHandle, + internal_disks_rx: &InternalDisksReceiver, bootstore_node_handle: &bootstore::NodeHandle, request: RackInitializeRequest, ) -> Result { @@ -182,7 +182,7 @@ impl RssAccess { *status = RssStatus::Initializing { id, completion, step_rx }; mem::drop(status); let parent_log = parent_log.clone(); - let storage_manager = storage_manager.clone(); + let internal_disks_rx = internal_disks_rx.clone(); let bootstore_node_handle = bootstore_node_handle.clone(); let status = Arc::clone(&self.status); tokio::spawn(async move { @@ -190,7 +190,7 @@ impl RssAccess { &parent_log, sprockets, global_zone_bootstrap_ip, - storage_manager, + internal_disks_rx, bootstore_node_handle, request, step_tx, @@ -328,7 +328,7 @@ async fn rack_initialize( parent_log: &Logger, sprockets: SprocketsConfig, global_zone_bootstrap_ip: Ipv6Addr, - storage_manager: StorageHandle, + internal_disks_rx: InternalDisksReceiver, bootstore_node_handle: bootstore::NodeHandle, request: RackInitializeRequest, step_tx: watch::Sender, @@ -338,7 +338,7 @@ async fn rack_initialize( sprockets, request, global_zone_bootstrap_ip, - storage_manager, + internal_disks_rx, bootstore_node_handle, step_tx, ) diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index efddfa2aa25..f3872b90feb 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -14,10 +14,10 @@ use futures::stream::FuturesUnordered; use omicron_common::backoff::BackoffError; use omicron_common::backoff::retry_notify; use omicron_common::backoff::retry_policy_local; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::rack_ops::RssStep; use sled_agent_types::sled::StartSledAgentRequest; -use sled_storage::manager::StorageHandle; use slog::Logger; use sprockets_tls::keys::SprocketsConfig; use std::net::Ipv6Addr; @@ -50,7 +50,7 @@ impl RssHandle { sprockets: SprocketsConfig, config: RackInitializeRequest, our_bootstrap_address: Ipv6Addr, - storage_manager: StorageHandle, + internal_disks_rx: InternalDisksReceiver, bootstore: bootstore::NodeHandle, step_tx: watch::Sender, ) -> Result<(), SetupServiceError> { @@ -59,7 +59,7 @@ impl RssHandle { let rss = RackSetupService::new( log.new(o!("component" => "RSS")), config, - storage_manager, + internal_disks_rx, tx, bootstore, step_tx, diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 288f35d2a33..9788bb943ad 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -39,11 +39,11 @@ use omicron_ddm_admin_client::DdmError; use omicron_ddm_admin_client::types::EnableStatsRequest; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::RackInitUuid; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::sled::StartSledAgentRequest; use sled_hardware::underlay; use sled_storage::dataset::CONFIG_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use std::io; use std::net::SocketAddr; @@ -183,9 +183,11 @@ impl Server { } = BootstrapAgentStartup::run(config).await?; // Do we have a StartSledAgentRequest stored in the ledger? - let paths = - sled_config_paths(&long_running_task_handles.storage_manager) - .await?; + let internal_disks_rx = long_running_task_handles + .config_reconciler + .internal_disks_rx() + .clone(); + let paths = sled_config_paths(&internal_disks_rx).await?; let maybe_ledger = Ledger::::new(&startup_log, paths).await; @@ -204,7 +206,7 @@ impl Server { let bootstrap_context = BootstrapServerContext { base_log: base_log.clone(), global_zone_bootstrap_ip, - storage_manager: long_running_task_handles.storage_manager.clone(), + internal_disks_rx, bootstore_node_handle: long_running_task_handles.bootstore.clone(), baseboard: long_running_task_handles.hardware_manager.baseboard(), rss_access, @@ -257,11 +259,6 @@ impl Server { .map_err(|_| ()) .expect("Failed to send to StorageMonitor"); - // For cold boot specifically, we now need to load the services - // we're responsible for, while continuing to handle hardware - // notifications. This cannot fail: we retry indefinitely until - // we're done loading services. - sled_agent.load_services().await; SledAgentState::ServerStarted(sled_agent_server) } else { SledAgentState::Bootstrapping(Some(sled_agent_started_tx)) @@ -386,9 +383,6 @@ async fn start_sled_agent( } } - // Inform the storage service that the key manager is available - long_running_task_handles.storage_manager.key_manager_ready().await; - // Inform our DDM reconciler of our underlay subnet and the information it // needs for maghemite to enable Oximeter stats. let ddm_reconciler = service_manager.ddm_reconciler(); @@ -413,8 +407,10 @@ async fn start_sled_agent( // Record this request so the sled agent can be automatically // initialized on the next boot. - let paths = - sled_config_paths(&long_running_task_handles.storage_manager).await?; + let paths = sled_config_paths( + long_running_task_handles.config_reconciler.internal_disks_rx(), + ) + .await?; let mut ledger = Ledger::new_with(&log, paths, request); ledger.commit().await?; @@ -468,12 +464,11 @@ impl From for SledAgentServerStartError { } async fn sled_config_paths( - storage: &StorageHandle, + internal_disks_rx: &InternalDisksReceiver, ) -> Result, MissingM2Paths> { - let resources = storage.get_latest_disks().await; - let paths: Vec<_> = resources - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() + let paths: Vec<_> = internal_disks_rx + .current() + .all_config_datasets() .map(|p| p.join(SLED_AGENT_REQUEST_FILE)) .collect(); @@ -620,15 +615,13 @@ impl Inner { } async fn uninstall_sled_local_config(&self) -> Result<(), BootstrapError> { - let config_dirs = self + let internal_disks = self .long_running_task_handles - .storage_manager - .get_latest_disks() - .await - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter(); + .config_reconciler + .internal_disks_rx() + .current(); - for dir in config_dirs { + for dir in internal_disks.all_config_datasets() { for entry in dir.read_dir_utf8().map_err(|err| { BootstrapError::Io { message: format!("Deleting {dir}"), err } })? { diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs index 9508a11bfba..0e8cd00463d 100644 --- a/sled-agent/src/hardware_monitor.rs +++ b/sled-agent/src/hardware_monitor.rs @@ -8,10 +8,10 @@ use crate::services::ServiceManager; use crate::sled_agent::SledAgent; +use sled_agent_config_reconciler::RawDisksSender; use sled_hardware::{HardwareManager, HardwareUpdate}; use sled_hardware_types::Baseboard; use sled_storage::disk::RawDisk; -use sled_storage::manager::StorageHandle; use slog::Logger; use tokio::sync::broadcast::error::RecvError; use tokio::sync::{broadcast, oneshot}; @@ -68,8 +68,8 @@ pub struct HardwareMonitor { // A reference to the hardware manager hardware_manager: HardwareManager, - // A handle to [`sled_hardware::manager::StorageManger`] - storage_manager: StorageHandle, + // A handle to send raw disk updates to the config-reconciler system. + raw_disks_tx: RawDisksSender, // A handle to the sled-agent // @@ -91,7 +91,7 @@ impl HardwareMonitor { pub fn new( log: &Logger, hardware_manager: &HardwareManager, - storage_manager: &StorageHandle, + raw_disks_tx: RawDisksSender, ) -> ( HardwareMonitor, oneshot::Sender, @@ -112,7 +112,7 @@ impl HardwareMonitor { service_manager_ready_rx, hardware_rx, hardware_manager: hardware_manager.clone(), - storage_manager: storage_manager.clone(), + raw_disks_tx, sled_agent: None, tofino_manager, }, @@ -177,36 +177,16 @@ impl HardwareMonitor { } } HardwareUpdate::DiskAdded(disk) => { - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - // - // Here and below, we're "dropping a future" rather than - // awaiting it. That's intentional - the hardware monitor - // doesn't care when this work is finished, just when it's - // enqueued. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .detected_raw_disk(disk.into()) - .await; + self.raw_disks_tx + .add_or_update_raw_disk(disk.into(), &self.log); } HardwareUpdate::DiskRemoved(disk) => { - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .detected_raw_disk_removal(disk.into()) - .await; + self.raw_disks_tx + .remove_raw_disk(disk.identity(), &self.log); } HardwareUpdate::DiskUpdated(disk) => { - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .detected_raw_disk_update(disk.into()) - .await; + self.raw_disks_tx + .add_or_update_raw_disk(disk.into(), &self.log); } }, Err(broadcast::error::RecvError::Lagged(count)) => { @@ -280,14 +260,9 @@ impl HardwareMonitor { self.deactivate_switch().await; } - // We notify the storage manager of the hardware, but do not need to - // wait for the result to be fully processed. - #[allow(clippy::let_underscore_future)] - let _ = self - .storage_manager - .ensure_using_exactly_these_disks( - self.hardware_manager.disks().into_values().map(RawDisk::from), - ) - .await; + self.raw_disks_tx.set_raw_disks( + self.hardware_manager.disks().into_values().map(RawDisk::from), + &self.log, + ); } } diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 0056056eb33..f96802e9732 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -18,7 +18,7 @@ use dropshot::{ Query, RequestContext, StreamingBody, TypedBody, }; use nexus_sled_agent_shared::inventory::{ - Inventory, OmicronSledConfig, OmicronSledConfigResult, SledRole, + Inventory, OmicronSledConfig, SledRole, }; use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{DiskRuntimeState, SledVmmState}; @@ -26,9 +26,6 @@ use omicron_common::api::internal::shared::{ ExternalIpGatewayMap, ResolvedVpcRouteSet, ResolvedVpcRouteState, SledIdentifiers, SwitchPorts, VirtualNetworkInterfaceHost, }; -use omicron_common::disk::{ - DatasetsConfig, DiskVariant, M2Slot, OmicronPhysicalDisksConfig, -}; use range_requests::RequestContextEx; use sled_agent_api::*; use sled_agent_types::boot_disk::{ @@ -400,13 +397,6 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseDeleted()) } - async fn datasets_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.datasets_config_list().await?)) - } - async fn zone_bundle_cleanup( rqctx: RequestContext, ) -> Result>, HttpError> @@ -428,27 +418,11 @@ impl SledAgentApi for SledAgentImpl { async fn omicron_config_put( rqctx: RequestContext, body: TypedBody, - ) -> Result, HttpError> { + ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); - sa.set_omicron_config(body_args) - .await - .map(HttpResponseOk) - .map_err(HttpError::from) - } - - async fn omicron_physical_disks_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) - } - - async fn zpools_get( - rqctx: RequestContext, - ) -> Result>, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.zpools_get().await)) + sa.set_omicron_config(body_args).await??; + Ok(HttpResponseUpdatedNoContent()) } async fn sled_role_get( @@ -792,29 +766,7 @@ impl SledAgentApi for SledAgentImpl { let boot_disk = path_params.into_inner().boot_disk; // Find our corresponding disk. - let maybe_disk_path = - sa.storage().get_latest_disks().await.iter_managed().find_map( - |(_identity, disk)| { - // Synthetic disks panic if asked for their `slot()`, so filter - // them out first; additionally, filter out any non-M2 disks. - if disk.is_synthetic() || disk.variant() != DiskVariant::M2 - { - return None; - } - - // Convert this M2 disk's slot to an M2Slot, and skip any that - // don't match the requested boot_disk. - let Ok(slot) = M2Slot::try_from(disk.slot()) else { - return None; - }; - if slot != boot_disk { - return None; - } - - let raw_devs_path = true; - Some(disk.boot_image_devfs_path(raw_devs_path)) - }, - ); + let maybe_disk_path = sa.boot_image_raw_devfs_path(boot_disk); let disk_path = match maybe_disk_path { Some(Ok(path)) => path, diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 5481672b17d..573fc717811 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -36,10 +36,9 @@ use propolis_client::Client as PropolisClient; use propolis_client::instance_spec::{ComponentV0, SpecKey}; use rand::SeedableRng; use rand::prelude::IteratorRandom; +use sled_agent_config_reconciler::AvailableDatasetsReceiver; use sled_agent_types::instance::*; use sled_agent_types::zone_bundle::ZoneBundleCause; -use sled_storage::dataset::ZONE_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use std::net::IpAddr; use std::net::SocketAddr; @@ -530,8 +529,8 @@ struct InstanceRunner { // Connection to Nexus nexus_client: NexusClient, - // Storage resources - storage: StorageHandle, + // Available datasets for choosing zone roots + available_datasets_rx: AvailableDatasetsReceiver, // Used to create propolis zones zone_builder_factory: ZoneBuilderFactory, @@ -1556,10 +1555,10 @@ impl Instance { nexus_client, vnic_allocator, port_manager, - storage, zone_bundler, zone_builder_factory, metrics_queue, + available_datasets_rx, } = services; let mut dhcp_config = DhcpCfg { @@ -1645,7 +1644,7 @@ impl Instance { state: InstanceStates::new(vmm_runtime, migration_id), running_state: None, nexus_client, - storage, + available_datasets_rx, zone_builder_factory, zone_bundler, metrics_queue, @@ -1991,13 +1990,10 @@ impl InstanceRunner { // configured VNICs. let zname = propolis_zone_name(&self.propolis_id); let mut rng = rand::rngs::StdRng::from_entropy(); - let latest_disks = self - .storage - .get_latest_disks() - .await - .all_u2_mountpoints(ZONE_DATASET); - let root = latest_disks + let root = self + .available_datasets_rx + .all_mounted_zone_root_datasets() .into_iter() .choose(&mut rng) .ok_or_else(|| Error::U2NotFound)?; @@ -2275,11 +2271,16 @@ mod tests { use omicron_common::api::external::{Generation, Hostname}; use omicron_common::api::internal::nexus::VmmState; use omicron_common::api::internal::shared::{DhcpConfig, SledIdentifiers}; + use omicron_common::disk::DiskIdentity; + use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::{ InstanceMigrateStatusResponse, InstanceStateMonitorResponse, }; + use sled_agent_config_reconciler::{ + CurrentlyManagedZpoolsReceiver, InternalDisksReceiver, + }; use sled_agent_types::zone_bundle::CleanupContext; - use sled_storage::manager_test_harness::StorageManagerTestHarness; + use sled_storage::config::MountConfig; use std::net::SocketAddrV6; use std::net::{Ipv4Addr, Ipv6Addr, SocketAddrV4}; use std::str::FromStr; @@ -2409,25 +2410,11 @@ mod tests { .expect("single-stepping mock server failed unexpectedly"); } - async fn setup_storage_manager(log: &Logger) -> StorageManagerTestHarness { - let mut harness = StorageManagerTestHarness::new(log).await; - let raw_disks = - harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; - harness.handle().key_manager_ready().await; - let config = harness.make_config(1, &raw_disks); - let _ = harness - .handle() - .omicron_physical_disks_ensure(config.clone()) - .await - .expect("Ensuring disks should work after key manager is ready"); - harness - } - async fn instance_struct( log: &Logger, propolis_addr: SocketAddr, nexus_client: NexusClient, - storage_handle: StorageHandle, + available_datasets_rx: AvailableDatasetsReceiver, temp_dir: &str, ) -> (Instance, MetricsRx) { let id = InstanceUuid::new_v4(); @@ -2439,7 +2426,7 @@ mod tests { let (services, rx) = fake_instance_manager_services( log, - storage_handle, + available_datasets_rx, nexus_client, temp_dir, ) @@ -2523,7 +2510,7 @@ mod tests { async fn fake_instance_manager_services( log: &Logger, - storage_handle: StorageHandle, + available_datasets_rx: AvailableDatasetsReceiver, nexus_client: NexusClient, temp_dir: &str, ) -> (InstanceManagerServices, MetricsRx) { @@ -2540,7 +2527,19 @@ mod tests { let cleanup_context = CleanupContext::default(); let zone_bundler = ZoneBundler::new( log.new(o!("component" => "ZoneBundler")), - storage_handle.clone(), + InternalDisksReceiver::fake_static( + Arc::new(MountConfig::default()), + [( + DiskIdentity { + vendor: "test-vendor".to_string(), + model: "test-model".to_string(), + serial: "test-serial".to_string(), + }, + ZpoolName::new_external(ZpoolUuid::new_v4()), + )] + .into_iter(), + ), + available_datasets_rx.clone(), cleanup_context, ) .await; @@ -2550,7 +2549,7 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage: storage_handle, + available_datasets_rx, zone_bundler, zone_builder_factory: ZoneBuilderFactory::fake( Some(temp_dir), @@ -2565,7 +2564,6 @@ mod tests { /// interactions with other parts of the system (e.g. Nexus and metrics). #[allow(dead_code)] struct InstanceTestObjects { - storage_harness: StorageManagerTestHarness, nexus: FakeNexusParts, _temp_guard: Utf8TempDir, instance_manager: crate::instance_manager::InstanceManager, @@ -2574,12 +2572,13 @@ mod tests { impl InstanceTestObjects { async fn new(log: &slog::Logger) -> Self { - let storage_harness = setup_storage_manager(log).await; let nexus = FakeNexusParts::new(&log).await; let temp_guard = Utf8TempDir::new().unwrap(); let (services, metrics_rx) = fake_instance_manager_services( log, - storage_harness.handle().clone(), + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), nexus.nexus_client.clone(), temp_guard.path().as_str(), ) @@ -2589,7 +2588,7 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage, + available_datasets_rx, zone_bundler, zone_builder_factory, metrics_queue, @@ -2603,7 +2602,10 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage, + CurrentlyManagedZpoolsReceiver::fake_static( + std::iter::empty(), + ), + available_datasets_rx, zone_bundler, zone_builder_factory, vmm_reservoir_manager, @@ -2612,17 +2614,12 @@ mod tests { .unwrap(); Self { - storage_harness, nexus, _temp_guard: temp_guard, instance_manager, metrics_rx, } } - - async fn cleanup(mut self) { - self.storage_harness.cleanup().await; - } } #[tokio::test] @@ -2642,9 +2639,6 @@ mod tests { _nexus_server, } = FakeNexusParts::new(&log).await; - let mut storage_harness = setup_storage_manager(&log).await; - let storage_handle = storage_harness.handle().clone(); - let temp_guard = Utf8TempDir::new().unwrap(); let (inst, mut metrics_rx) = timeout( @@ -2653,7 +2647,9 @@ mod tests { &log, propolis_addr, nexus_client, - storage_handle, + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), temp_guard.path().as_str(), ), ) @@ -2708,7 +2704,6 @@ mod tests { .try_recv() .expect_err("The metrics request queue should have one message"); - storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -2727,9 +2722,6 @@ mod tests { _nexus_server, } = FakeNexusParts::new(&log).await; - let mut storage_harness = setup_storage_manager(&logctx.log).await; - let storage_handle = storage_harness.handle().clone(); - let temp_guard = Utf8TempDir::new().unwrap(); let (inst, _) = timeout( @@ -2739,7 +2731,9 @@ mod tests { // we want to test propolis not ever coming up SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 1, 0, 0)), nexus_client, - storage_handle, + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), temp_guard.path().as_str(), ), ) @@ -2775,7 +2769,6 @@ mod tests { ); } - storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -2874,7 +2867,6 @@ mod tests { .try_recv() .expect_err("The metrics request queue should have one message"); - test_objects.cleanup().await; logctx.cleanup_successful(); } @@ -3020,7 +3012,6 @@ mod tests { .expect("timed out waiting for VmmState::Stopped in FakeNexus") .expect("failed to receive FakeNexus' InstanceState"); - test_objects.cleanup().await; logctx.cleanup_successful(); } @@ -3067,7 +3058,7 @@ mod tests { nexus_client, vnic_allocator, port_manager, - storage, + available_datasets_rx, zone_bundler, zone_builder_factory, metrics_queue, @@ -3102,7 +3093,7 @@ mod tests { state: InstanceStates::new(vmm_runtime, migration_id), running_state: None, nexus_client, - storage, + available_datasets_rx, zone_builder_factory, zone_bundler, metrics_queue, @@ -3125,12 +3116,10 @@ mod tests { // them directly. _nexus_server: HttpServer, _dns_server: TransientServer, - storage_harness: StorageManagerTestHarness, } impl TestInstanceRunner { async fn new(log: &slog::Logger) -> Self { - let storage_harness = setup_storage_manager(&log).await; let FakeNexusParts { nexus_client, _nexus_server, @@ -3141,7 +3130,9 @@ mod tests { let temp_guard = Utf8TempDir::new().unwrap(); let (services, _metrics_rx) = fake_instance_manager_services( &log, - storage_harness.handle().clone(), + AvailableDatasetsReceiver::fake_in_tempdir_for_tests( + ZpoolOrRamdisk::Ramdisk, + ), nexus_client, temp_guard.path().as_str(), ) @@ -3183,7 +3174,6 @@ mod tests { remove_rx, _nexus_server, _dns_server, - storage_harness, } } } @@ -3204,7 +3194,6 @@ mod tests { mut remove_rx, _nexus_server, _dns_server, - mut storage_harness, } = TestInstanceRunner::new(&log).await; let (resp_tx, resp_rx) = oneshot::channel(); @@ -3258,7 +3247,6 @@ mod tests { drop(terminate_tx); let _ = runner_task.await; - storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -3278,7 +3266,6 @@ mod tests { mut remove_rx, _nexus_server, _dns_server, - mut storage_harness, } = TestInstanceRunner::new(&log).await; let (resp_tx, resp_rx) = oneshot::channel(); @@ -3301,7 +3288,6 @@ mod tests { }; assert_eq!(state.vmm_state.state, VmmState::Failed); - storage_harness.cleanup().await; logctx.cleanup_successful(); } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 9bd0ad76497..fa8a11c89d8 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -17,15 +17,14 @@ use illumos_utils::link::VnicAllocator; use illumos_utils::opte::PortManager; use illumos_utils::running_zone::ZoneBuilderFactory; use omicron_common::api::external::ByteCount; -use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::SledVmmState; use omicron_common::api::internal::shared::SledIdentifiers; use omicron_uuid_kinds::PropolisUuid; +use sled_agent_config_reconciler::AvailableDatasetsReceiver; +use sled_agent_config_reconciler::CurrentlyManagedZpoolsReceiver; use sled_agent_types::instance::*; -use sled_storage::manager::StorageHandle; -use sled_storage::resources::AllDisks; use slog::Logger; -use std::collections::{BTreeMap, HashSet}; +use std::collections::BTreeMap; use std::sync::Arc; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; @@ -66,10 +65,10 @@ pub(crate) struct InstanceManagerServices { pub nexus_client: NexusClient, pub vnic_allocator: VnicAllocator, pub port_manager: PortManager, - pub storage: StorageHandle, pub zone_bundler: ZoneBundler, pub zone_builder_factory: ZoneBuilderFactory, pub metrics_queue: MetricsRequestQueue, + pub available_datasets_rx: AvailableDatasetsReceiver, } // Describes the internals of the "InstanceManager", though most of the @@ -95,7 +94,8 @@ impl InstanceManager { nexus_client: NexusClient, vnic_allocator: VnicAllocator, port_manager: PortManager, - storage: StorageHandle, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, vmm_reservoir_manager: VmmReservoirManagerHandle, metrics_queue: MetricsRequestQueue, @@ -105,7 +105,8 @@ impl InstanceManager { nexus_client, vnic_allocator, port_manager, - storage, + currently_managed_zpools_rx, + available_datasets_rx, zone_bundler, ZoneBuilderFactory::new(), vmm_reservoir_manager, @@ -123,7 +124,8 @@ impl InstanceManager { nexus_client: NexusClient, vnic_allocator: VnicAllocator, port_manager: PortManager, - storage: StorageHandle, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, zone_builder_factory: ZoneBuilderFactory, vmm_reservoir_manager: VmmReservoirManagerHandle, @@ -142,8 +144,8 @@ impl InstanceManager { jobs: BTreeMap::new(), vnic_allocator, port_manager, - storage_generation: None, - storage, + currently_managed_zpools_rx, + available_datasets_rx, zone_bundler, zone_builder_factory, metrics_queue, @@ -315,23 +317,6 @@ impl InstanceManager { .map_err(|_| Error::FailedSendInstanceManagerClosed)?; rx.await? } - - /// Marks instances failed unless they're using storage from `disks`. - /// - /// This function looks for transient zone filesystem usage on expunged - /// zpools. - pub async fn use_only_these_disks( - &self, - disks: AllDisks, - ) -> Result<(), Error> { - let (tx, rx) = oneshot::channel(); - self.inner - .tx - .send(InstanceManagerRequest::OnlyUseDisks { disks, tx }) - .await - .map_err(|_| Error::FailedSendInstanceManagerClosed)?; - rx.await? - } } // Most requests that can be sent to the "InstanceManagerRunner" task. @@ -386,10 +371,6 @@ enum InstanceManagerRequest { propolis_id: PropolisUuid, tx: oneshot::Sender>, }, - OnlyUseDisks { - disks: AllDisks, - tx: oneshot::Sender>, - }, } // Requests that the instance manager stop processing information about a @@ -426,8 +407,8 @@ struct InstanceManagerRunner { vnic_allocator: VnicAllocator, port_manager: PortManager, - storage_generation: Option, - storage: StorageHandle, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + available_datasets_rx: AvailableDatasetsReceiver, zone_bundler: ZoneBundler, zone_builder_factory: ZoneBuilderFactory, metrics_queue: MetricsRequestQueue, @@ -458,6 +439,24 @@ impl InstanceManagerRunner { }, } }, + // If the set of currently-managed zpools has changed, shut down + // any instances due to disks that have disappeared out from + // under them. + result = self.currently_managed_zpools_rx.changed() => { + match result { + Ok(()) => { + self.use_only_currently_managed_zpools().await; + } + Err(_) => { + warn!( + self.log, + "InstanceManager's 'current zpools' channel \ + closed; shutting down", + ); + break; + } + } + } request = self.rx.recv() => { let request_variant = request.as_ref().map(|r| r.to_string()); let result = match request { @@ -495,10 +494,6 @@ impl InstanceManagerRunner { // the state... self.get_instance_state(tx, propolis_id) }, - Some(OnlyUseDisks { disks, tx } ) => { - self.use_only_these_disks(disks).await; - tx.send(Ok(())).map_err(|_| Error::FailedSendClientClosed) - }, None => { warn!(self.log, "InstanceManager's request channel closed; shutting down"); break; @@ -607,10 +602,10 @@ impl InstanceManagerRunner { nexus_client: self.nexus_client.clone(), vnic_allocator: self.vnic_allocator.clone(), port_manager: self.port_manager.clone(), - storage: self.storage.clone(), zone_bundler: self.zone_bundler.clone(), zone_builder_factory: self.zone_builder_factory.clone(), metrics_queue: self.metrics_queue.clone(), + available_datasets_rx: self.available_datasets_rx.clone(), }; let state = crate::instance::InstanceInitialState { @@ -760,24 +755,9 @@ impl InstanceManagerRunner { Ok(()) } - async fn use_only_these_disks(&mut self, disks: AllDisks) { - // Consider the generation number on the incoming request to avoid - // applying old requests. - let requested_generation = *disks.generation(); - if let Some(last_gen) = self.storage_generation { - if last_gen >= requested_generation { - // This request looks old, ignore it. - info!(self.log, "use_only_these_disks: Ignoring request"; - "last_gen" => ?last_gen, "requested_gen" => ?requested_generation); - return; - } - } - self.storage_generation = Some(requested_generation); - info!(self.log, "use_only_these_disks: Processing new request"; - "gen" => ?requested_generation); - - let u2_set: HashSet<_> = disks.all_u2_zpools().into_iter().collect(); - + async fn use_only_currently_managed_zpools(&mut self) { + let current_zpools = + self.currently_managed_zpools_rx.current_and_update(); let mut to_remove = vec![]; for (id, instance) in self.jobs.iter() { // If we can read the filesystem pool, consider it. Otherwise, move @@ -792,7 +772,7 @@ impl InstanceManagerRunner { info!(self.log, "use_only_these_disks: Cannot read filesystem pool"; "instance_id" => ?id); continue; }; - if !u2_set.contains(&filesystem_pool) { + if !current_zpools.contains(&filesystem_pool) { to_remove.push(*id); } } diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 3a63ff5c117..97d1493467d 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -35,7 +35,6 @@ pub mod rack_setup; pub mod server; pub mod services; mod sled_agent; -mod storage_monitor; mod support_bundle; mod swap_device; mod updates; diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs index 216961072e5..37b646b8a44 100644 --- a/sled-agent/src/long_running_tasks.rs +++ b/sled-agent/src/long_running_tasks.rs @@ -20,39 +20,30 @@ use crate::config::Config; use crate::hardware_monitor::HardwareMonitor; use crate::services::ServiceManager; use crate::sled_agent::SledAgent; -use crate::storage_monitor::{StorageMonitor, StorageMonitorHandle}; use crate::zone_bundle::ZoneBundler; use bootstore::schemes::v0 as bootstore; use illumos_utils::zpool::ZpoolName; use key_manager::{KeyManager, StorageKeyRequester}; +use sled_agent_config_reconciler::{ + ConfigReconcilerHandle, RawDisksSender, TimeSyncConfig, +}; use sled_agent_types::zone_bundle::CleanupContext; use sled_agent_zone_images::{ZoneImageSourceResolver, ZoneImageZpools}; use sled_hardware::{HardwareManager, SledMode, UnparsedDisk}; use sled_storage::config::MountConfig; use sled_storage::disk::RawSyntheticDisk; -use sled_storage::manager::{StorageHandle, StorageManager}; -use sled_storage::resources::AllDisks; use slog::{Logger, info}; use std::net::Ipv6Addr; +use std::sync::Arc; use tokio::sync::oneshot; /// A mechanism for interacting with all long running tasks that can be shared /// between the bootstrap-agent and sled-agent code. #[derive(Clone)] pub struct LongRunningTaskHandles { - /// A mechanism for retrieving storage keys. This interacts with the - /// [`KeyManager`] task. In the future, there may be other handles for - /// retrieving different types of keys. Separating the handles limits the - /// access for a given key type to the code that holds the handle. - pub storage_key_requester: StorageKeyRequester, - - /// A mechanism for talking to the [`StorageManager`] which is responsible - /// for establishing zpools on disks and managing their datasets. - pub storage_manager: StorageHandle, - - /// A mechanism for talking to the [`StorageMonitor`], which reacts to disk - /// changes and updates the dump devices. - pub storage_monitor_handle: StorageMonitorHandle, + /// A handle to the set of tasks managed by the sled-agent-config-reconciler + /// system. + pub config_reconciler: Arc, /// A mechanism for interacting with the hardware device tree pub hardware_manager: HardwareManager, @@ -83,11 +74,18 @@ pub async fn spawn_all_longrunning_tasks( oneshot::Sender, ) { let storage_key_requester = spawn_key_manager(log); - let mut storage_manager = - spawn_storage_manager(log, storage_key_requester.clone()); - let storage_monitor_handle = - spawn_storage_monitor(log, storage_manager.clone()); + let time_sync_config = if let Some(true) = config.skip_timesync { + TimeSyncConfig::Skip + } else { + TimeSyncConfig::Normal + }; + let mut config_reconciler = ConfigReconcilerHandle::new( + MountConfig::default(), + storage_key_requester, + time_sync_config, + log, + ); let nongimlet_observed_disks = config.nongimlet_observed_disks.clone().unwrap_or(vec![]); @@ -95,38 +93,35 @@ pub async fn spawn_all_longrunning_tasks( let hardware_manager = spawn_hardware_manager(log, sled_mode, nongimlet_observed_disks).await; - // Start monitoring for hardware changes + // Start monitoring for hardware changes, adding some synthetic disks if + // necessary. + let raw_disks_tx = config_reconciler.raw_disks_tx(); + upsert_synthetic_disks_if_needed(&log, &raw_disks_tx, &config).await; let (sled_agent_started_tx, service_manager_ready_tx) = - spawn_hardware_monitor(log, &hardware_manager, &storage_manager); - - // Add some synthetic disks if necessary. - upsert_synthetic_disks_if_needed(&log, &storage_manager, &config).await; + spawn_hardware_monitor(log, &hardware_manager, raw_disks_tx); // Wait for the boot disk so that we can work with any ledgers, // such as those needed by the bootstore and sled-agent info!(log, "Waiting for boot disk"); - let (disk_id, boot_zpool) = storage_manager.wait_for_boot_disk().await; + let disk_id = config_reconciler.wait_for_boot_disk().await; info!(log, "Found boot disk {:?}", disk_id); let all_disks = storage_manager.get_latest_disks().await; let bootstore = spawn_bootstore_tasks( log, - &all_disks, + &config_reconciler, &hardware_manager, global_zone_bootstrap_ip, ) .await; - let zone_bundler = - spawn_zone_bundler_tasks(log, &mut storage_manager).await; + let zone_bundler = spawn_zone_bundler_tasks(log, &config_reconciler); let zone_image_resolver = make_zone_image_resolver(log, &all_disks, &boot_zpool); ( LongRunningTaskHandles { - storage_key_requester, - storage_manager, - storage_monitor_handle, + config_reconciler: Arc::new(config_reconciler), hardware_manager, bootstore, zone_bundler, @@ -146,32 +141,6 @@ fn spawn_key_manager(log: &Logger) -> StorageKeyRequester { storage_key_requester } -fn spawn_storage_manager( - log: &Logger, - key_requester: StorageKeyRequester, -) -> StorageHandle { - info!(log, "Starting StorageManager"); - let (manager, handle) = - StorageManager::new(log, MountConfig::default(), key_requester); - tokio::spawn(async move { - manager.run().await; - }); - handle -} - -fn spawn_storage_monitor( - log: &Logger, - storage_handle: StorageHandle, -) -> StorageMonitorHandle { - info!(log, "Starting StorageMonitor"); - let (storage_monitor, handle) = - StorageMonitor::new(log, MountConfig::default(), storage_handle); - tokio::spawn(async move { - storage_monitor.run().await; - }); - handle -} - async fn spawn_hardware_manager( log: &Logger, sled_mode: SledMode, @@ -197,11 +166,11 @@ async fn spawn_hardware_manager( fn spawn_hardware_monitor( log: &Logger, hardware_manager: &HardwareManager, - storage_handle: &StorageHandle, + raw_disks_tx: RawDisksSender, ) -> (oneshot::Sender, oneshot::Sender) { info!(log, "Starting HardwareMonitor"); let (mut monitor, sled_agent_started_tx, service_manager_ready_tx) = - HardwareMonitor::new(log, hardware_manager, storage_handle); + HardwareMonitor::new(log, hardware_manager, raw_disks_tx); tokio::spawn(async move { monitor.run().await; }); @@ -210,12 +179,16 @@ fn spawn_hardware_monitor( async fn spawn_bootstore_tasks( log: &Logger, - all_disks: &AllDisks, + config_reconciler: &ConfigReconcilerHandle, hardware_manager: &HardwareManager, global_zone_bootstrap_ip: Ipv6Addr, ) -> bootstore::NodeHandle { let config = new_bootstore_config( - all_disks, + &config_reconciler + .internal_disks_rx() + .current() + .all_cluster_datasets() + .collect::>(), hardware_manager.baseboard(), global_zone_bootstrap_ip, ) @@ -240,12 +213,16 @@ async fn spawn_bootstore_tasks( // `ZoneBundler::new` spawns a periodic cleanup task that runs indefinitely async fn spawn_zone_bundler_tasks( log: &Logger, - storage_handle: &mut StorageHandle, + config_reconciler: &ConfigReconcilerHandle, ) -> ZoneBundler { info!(log, "Starting ZoneBundler related tasks"); let log = log.new(o!("component" => "ZoneBundler")); - ZoneBundler::new(log, storage_handle.clone(), CleanupContext::default()) - .await + ZoneBundler::new( + log, + config_reconciler.internal_disks_rx().clone(), + config_reconciler.available_datasets_rx(), + CleanupContext::default(), + ) } fn make_zone_image_resolver( @@ -262,7 +239,7 @@ fn make_zone_image_resolver( async fn upsert_synthetic_disks_if_needed( log: &Logger, - storage_manager: &StorageHandle, + raw_disks_tx: &RawDisksSender, config: &Config, ) { if let Some(vdevs) = &config.vdevs { @@ -275,7 +252,7 @@ async fn upsert_synthetic_disks_if_needed( let disk = RawSyntheticDisk::load(vdev, i.try_into().unwrap()) .expect("Failed to parse synthetic disk") .into(); - storage_manager.detected_raw_disk(disk).await.await.unwrap(); + raw_disks_tx.add_or_update_raw_disk(disk, log); } } } diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index 75729e3a872..c4f97ed22ac 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -10,8 +10,8 @@ use nexus_client::types::{ BackgroundTasksActivateRequest, ProbeExternalIp, ProbeInfo, }; use omicron_common::api::external::{ - Generation, VpcFirewallRuleAction, VpcFirewallRuleDirection, - VpcFirewallRulePriority, VpcFirewallRuleStatus, + VpcFirewallRuleAction, VpcFirewallRuleDirection, VpcFirewallRulePriority, + VpcFirewallRuleStatus, }; use omicron_common::api::internal::shared::{ NetworkInterface, ResolvedVpcFirewallRule, @@ -19,9 +19,10 @@ use omicron_common::api::internal::shared::{ use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid}; use rand::SeedableRng; use rand::prelude::IteratorRandom; -use sled_storage::dataset::ZONE_DATASET; -use sled_storage::manager::StorageHandle; -use sled_storage::resources::AllDisks; +use sled_agent_config_reconciler::{ + AvailableDatasetsReceiver, CurrentlyManagedZpools, + CurrentlyManagedZpoolsReceiver, +}; use slog::{Logger, error, warn}; use std::collections::{HashMap, HashSet}; use std::hash::{Hash, Hasher}; @@ -50,7 +51,6 @@ pub(crate) struct ProbeManager { } struct RunningProbes { - storage_generation: Option, zones: HashMap, } @@ -60,10 +60,10 @@ pub(crate) struct ProbeManagerInner { log: Logger, sled_id: Uuid, vnic_allocator: VnicAllocator, - storage: StorageHandle, port_manager: PortManager, metrics_queue: MetricsRequestQueue, running_probes: Mutex, + available_datasets_rx: AvailableDatasetsReceiver, zones_api: Arc, } @@ -73,9 +73,9 @@ impl ProbeManager { sled_id: Uuid, nexus_client: NexusClient, etherstub: Etherstub, - storage: StorageHandle, port_manager: PortManager, metrics_queue: MetricsRequestQueue, + available_datasets_rx: AvailableDatasetsReceiver, log: Logger, ) -> Self { Self { @@ -87,69 +87,24 @@ impl ProbeManager { Arc::new(illumos_utils::dladm::Dladm::real_api()), ), running_probes: Mutex::new(RunningProbes { - storage_generation: None, zones: HashMap::new(), }), nexus_client, log, sled_id, - storage, port_manager, metrics_queue, + available_datasets_rx, zones_api: Arc::new(illumos_utils::zone::Zones::real_api()), }), } } - pub(crate) async fn run(&self) { - self.inner.run().await; - } - - /// Removes any probes using filesystem roots on zpools that are not - /// contained in the set of "disks". - pub(crate) async fn use_only_these_disks(&self, disks: &AllDisks) { - let u2_set: HashSet<_> = disks.all_u2_zpools().into_iter().collect(); - let mut probes = self.inner.running_probes.lock().await; - - // Consider the generation number on the incoming request to avoid - // applying old requests. - let requested_generation = *disks.generation(); - if let Some(last_gen) = probes.storage_generation { - if last_gen >= requested_generation { - // This request looks old, ignore it. - info!(self.inner.log, "use_only_these_disks: Ignoring request"; - "last_gen" => ?last_gen, "requested_gen" => ?requested_generation); - return; - } - } - probes.storage_generation = Some(requested_generation); - info!(self.inner.log, "use_only_these_disks: Processing new request"; - "gen" => ?requested_generation); - - let to_remove = probes - .zones - .iter() - .filter_map(|(id, probe)| { - let probe_pool = match probe.root_zpool() { - ZpoolOrRamdisk::Zpool(zpool_name) => zpool_name, - ZpoolOrRamdisk::Ramdisk => { - info!( - self.inner.log, - "use_only_these_disks: removing probe on ramdisk"; - "id" => ?id, - ); - return None; - } - }; - - if !u2_set.contains(probe_pool) { Some(*id) } else { None } - }) - .collect::>(); - - for probe_id in to_remove { - info!(self.inner.log, "use_only_these_disks: Removing probe"; "probe_id" => ?probe_id); - self.inner.remove_probe_locked(&mut probes, probe_id).await; - } + pub(crate) async fn run( + &self, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + ) { + self.inner.run(currently_managed_zpools_rx).await; } } @@ -214,18 +169,55 @@ impl TryFrom for ProbeState { impl ProbeManagerInner { /// Run the probe manager. If it's already running this is a no-op. - async fn run(self: &Arc) { + async fn run( + self: &Arc, + currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + ) { let mut join_handle = self.join_handle.lock().await; if join_handle.is_none() { - *join_handle = Some(self.clone().reconciler()) + *join_handle = + Some(self.clone().reconciler(currently_managed_zpools_rx)) } } /// Run the reconciler loop on a background thread. - fn reconciler(self: Arc) -> JoinHandle<()> { + fn reconciler( + self: Arc, + mut currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, + ) -> JoinHandle<()> { tokio::spawn(async move { loop { - sleep(RECONCILIATION_INTERVAL).await; + let sleep_fut = sleep(RECONCILIATION_INTERVAL); + tokio::pin!(sleep_fut); + + // Wait until the next reconciliation tick, but handle any + // changes to the set of disks in the meantime. + loop { + tokio::select! { + _ = &mut sleep_fut => break, + + // Cancel-safe per docs on `changed()` + result = currently_managed_zpools_rx.changed() => { + match result { + Ok(()) => { + self.use_only_these_disks( + ¤tly_managed_zpools_rx + .current_and_update() + ).await; + continue; + } + Err(_) => { + warn!( + self.log, + "ProbeManager's 'current zpools' \ + channel closed; shutting down", + ); + return; + } + } + } + } + } // Collect the target and current state. Use set operations // to determine what probes need to be added, removed and/or @@ -268,6 +260,37 @@ impl ProbeManagerInner { }) } + /// Removes any probes using filesystem roots on zpools that are not + /// contained in the set of "disks". + async fn use_only_these_disks(&self, disks: &CurrentlyManagedZpools) { + let mut probes = self.running_probes.lock().await; + + let to_remove = probes + .zones + .iter() + .filter_map(|(id, probe)| { + let probe_pool = match probe.root_zpool() { + ZpoolOrRamdisk::Zpool(zpool_name) => zpool_name, + ZpoolOrRamdisk::Ramdisk => { + info!( + self.log, + "use_only_these_disks: removing probe on ramdisk"; + "id" => ?id, + ); + return None; + } + }; + + if !disks.contains(probe_pool) { Some(*id) } else { None } + }) + .collect::>(); + + for probe_id in to_remove { + info!(self.log, "use_only_these_disks: Removing probe"; "probe_id" => ?probe_id); + self.remove_probe_locked(&mut probes, probe_id).await; + } + } + /// Add a set of probes to this sled. /// /// Returns the number of inserted probes. @@ -291,12 +314,9 @@ impl ProbeManagerInner { /// boots the probe zone. async fn add_probe(self: &Arc, probe: &ProbeState) -> Result<()> { let mut rng = rand::rngs::StdRng::from_entropy(); - let current_disks = self - .storage - .get_latest_disks() - .await - .all_u2_mountpoints(ZONE_DATASET); - let zone_root_path = current_disks + let zone_root_path = self + .available_datasets_rx + .all_mounted_zone_root_datasets() .into_iter() .choose(&mut rng) .ok_or_else(|| anyhow!("u2 not found"))?; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index ad1778501bb..436cd4503fc 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -117,6 +117,7 @@ use serde::{Deserialize, Serialize}; use sled_agent_client::{ Client as SledAgentClient, Error as SledAgentError, types as SledAgentTypes, }; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; @@ -127,8 +128,6 @@ use sled_agent_types::rack_ops::RssStep; use sled_agent_types::sled::StartSledAgentRequest; use sled_agent_types::time_sync::TimeSync; use sled_hardware_types::underlay::BootstrapInterface; -use sled_storage::dataset::CONFIG_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; use std::collections::{BTreeMap, BTreeSet, btree_map}; @@ -257,15 +256,14 @@ impl RackSetupService { /// Arguments: /// - `log`: The logger. /// - `config`: The config file, which is used to setup the rack. - /// - `storage_manager`: A handle for interacting with the storage manager - /// task + /// - `internal_disks_rx`: Tells us about available internal disks /// - `local_bootstrap_agent`: Communication channel by which we can send /// commands to our local bootstrap-agent (e.g., to start sled-agents) /// - `bootstore` - A handle to call bootstore APIs pub(crate) fn new( log: Logger, config: Config, - storage_manager: StorageHandle, + internal_disks_rx: InternalDisksReceiver, local_bootstrap_agent: BootstrapAgentHandle, bootstore: bootstore::NodeHandle, step_tx: watch::Sender, @@ -275,7 +273,7 @@ impl RackSetupService { if let Err(e) = svc .run( &config, - &storage_manager, + &internal_disks_rx, local_bootstrap_agent, bootstore, step_tx, @@ -375,86 +373,9 @@ impl ServiceInner { "datasets" => ?sled_config.datasets, "zones" => ?sled_config.zones, ); - let result = client.omicron_config_put(&sled_config).await; - let error = match result { - Ok(response) => { - let response = response.into_inner(); - - // An HTTP OK may contain _partial_ success: check whether - // we got any individual disk failures, and split those out - // into transient/permanent cases based on whether they - // indicate we should retry. - let disk_errors = - response.disks.into_iter().filter_map(|status| { - status.err.map(|err| (status.identity, err)) - }); - let mut transient_errors = Vec::new(); - let mut permanent_errors = Vec::new(); - for (identity, error) in disk_errors { - if error.retryable() { - transient_errors.push(format!( - "Retryable error initializing disk \ - {} / {} / {}: {}", - identity.vendor, - identity.model, - identity.serial, - InlineErrorChain::new(&error) - )); - } else { - permanent_errors.push(format!( - "Non-retryable error initializing disk \ - {} / {} / {}: {}", - identity.vendor, - identity.model, - identity.serial, - InlineErrorChain::new(&error) - )); - } - } - if !permanent_errors.is_empty() { - return Err(BackoffError::permanent( - SetupServiceError::DiskInitializationPermanent { - permanent_errors, - }, - )); - } - if !transient_errors.is_empty() { - return Err(BackoffError::transient( - SetupServiceError::DiskInitializationTransient { - transient_errors, - }, - )); - } - - // No individual disk errors reported; all disks were - // initialized. Check for any dataset errors; these are not - // retryable. - let dataset_errors = response - .datasets - .into_iter() - .filter_map(|status| { - status.err.map(|err| { - format!( - "Error initializing dataset {}: {err}", - status.dataset_name.full_name() - ) - }) - }) - .collect::>(); - if !dataset_errors.is_empty() { - return Err(BackoffError::permanent( - SetupServiceError::DatasetInitialization { - errors: dataset_errors, - }, - )); - } - - // No individual dataset errors reported. We don't get - // status for individual zones (any failure there results in - // an HTTP-level error), so everything is good. - return Ok(()); - } - Err(error) => error, + let Err(error) = client.omicron_config_put(&sled_config).await + else { + return Ok(()); }; if let sled_agent_client::Error::ErrorResponse(response) = &error { @@ -502,6 +423,16 @@ impl ServiceInner { Ok(()) } + // Wait until the config reconciler on the target sled has successfully + // reconciled the config at `generation`. + async fn wait_for_config_reconciliation_on_sled( + &self, + _sled_address: SocketAddrV6, + _generation: Generation, + ) -> Result<(), SetupServiceError> { + unimplemented!("needs updated inventory") + } + // Ensure that the desired sled configuration for a particular zone version // is deployed. // @@ -530,21 +461,27 @@ impl ServiceInner { })? .clone(); + // We bump the zone generation as we step through phases of + // RSS; use that as the overall sled config generation. + let generation = zones_config.generation; let sled_config = OmicronSledConfig { - // We bump the zone generation as we step through phases of - // RSS; use that as the overall sled config generation. - generation: zones_config.generation, + generation, disks: config .disks .iter() .map(|c| c.clone().into()) .collect(), datasets: config.datasets.values().cloned().collect(), - zones: zones_config.zones.iter().cloned().collect(), + zones: zones_config.zones.into_iter().collect(), remove_mupdate_override: None, }; self.set_config_on_sled(*sled_address, sled_config).await?; + self.wait_for_config_reconciliation_on_sled( + *sled_address, + generation, + ) + .await?; Ok::<(), SetupServiceError>(()) }), @@ -1121,7 +1058,7 @@ impl ServiceInner { async fn run( &self, config: &Config, - storage_manager: &StorageHandle, + internal_disks_rx: &InternalDisksReceiver, local_bootstrap_agent: BootstrapAgentHandle, bootstore: bootstore::NodeHandle, step_tx: watch::Sender, @@ -1134,19 +1071,18 @@ impl ServiceInner { config.az_subnet(), )?; - let started_marker_paths: Vec = storage_manager - .get_latest_disks() - .await - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() + let config_dataset_paths = internal_disks_rx + .current() + .all_config_datasets() + .collect::>(); + + let started_marker_paths: Vec = config_dataset_paths + .iter() .map(|p| p.join(RSS_STARTED_FILENAME)) .collect(); - let completed_marker_paths: Vec = storage_manager - .get_latest_disks() - .await - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() + let completed_marker_paths: Vec = config_dataset_paths + .iter() .map(|p| p.join(RSS_COMPLETED_FILENAME)) .collect(); diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 84a4e33e2ce..31f2f6a70f8 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -20,12 +20,11 @@ //! of what other services Nexus wants to have executing on the sled. //! //! To accomplish this, the following interfaces are exposed: -//! - [ServiceManager::ensure_all_omicron_zones_persistent] exposes an API to -//! request a set of Omicron zones that should persist beyond reboot. +//! - [ServiceManager::start_omicron_zone] exposes an API to start a new Omicron +//! zone. //! - [ServiceManager::activate_switch] exposes an API to specifically enable //! or disable (via [ServiceManager::deactivate_switch]) the switch zone. -use crate::artifact_store::ArtifactStore; use crate::bootstrap::BootstrapNetworking; use crate::bootstrap::early_networking::{ EarlyNetworkSetup, EarlyNetworkSetupError, @@ -35,7 +34,6 @@ use crate::ddm_reconciler::DdmReconciler; use crate::metrics::MetricsRequestQueue; use crate::params::{DendriteAsic, OmicronZoneTypeExt}; use crate::profile::*; -use crate::zone_bundle::ZoneBundler; use anyhow::anyhow; use camino::{Utf8Path, Utf8PathBuf}; use clickhouse_admin_types::CLICKHOUSE_KEEPER_CONFIG_DIR; @@ -94,41 +92,33 @@ use omicron_common::backoff::{ BackoffError, retry_notify, retry_policy_internal_service_aggressive, }; use omicron_common::disk::{DatasetKind, DatasetName}; -use omicron_common::ledger::{self, Ledger, Ledgerable}; +use omicron_common::ledger::Ledgerable; use omicron_ddm_admin_client::DdmError; use omicron_uuid_kinds::OmicronZoneUuid; use rand::prelude::SliceRandom; -use sled_agent_types::{ - sled::SWITCH_ZONE_BASEBOARD_FILE, time_sync::TimeSync, - zone_bundle::ZoneBundleCause, -}; use sled_agent_zone_images::{ZoneImageSourceResolver, ZoneImageZpools}; +use sled_agent_config_reconciler::InternalDisksReceiver; +use sled_agent_types::sled::SWITCH_ZONE_BASEBOARD_FILE; use sled_hardware::SledMode; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware_types::Baseboard; use sled_storage::config::MountConfig; -use sled_storage::dataset::{CONFIG_DATASET, ZONE_DATASET}; -use sled_storage::manager::StorageHandle; +use sled_storage::dataset::{INSTALL_DATASET, ZONE_DATASET}; use slog::Logger; use slog_error_chain::InlineErrorChain; -use std::collections::BTreeMap; -use std::collections::HashSet; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; -use std::str::FromStr; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, OnceLock}; use tokio::io::AsyncWriteExt; use tokio::sync::Mutex; -use tokio::sync::{MutexGuard, oneshot}; +use tokio::sync::oneshot; use tokio::task::JoinHandle; use tufaceous_artifact::ArtifactHash; use uuid::Uuid; use illumos_utils::zone::Zones; -const IPV6_UNSPECIFIED: IpAddr = IpAddr::V6(Ipv6Addr::UNSPECIFIED); - // These are all the same binary. They just reside at different paths. const CLICKHOUSE_SERVER_BINARY: &str = "/opt/oxide/clickhouse_server/clickhouse"; @@ -157,9 +147,6 @@ pub enum Error { #[error("Failed to find device {device}")] MissingDevice { device: String }, - #[error("Failed to access ledger: {0}")] - Ledger(#[from] ledger::Error), - #[error("Sled Agent not initialized yet")] SledAgentNotReady, @@ -387,14 +374,6 @@ impl From for omicron_common::api::external::Error { } } -/// Result of [ServiceManager::load_services] -pub enum LoadServicesResult { - /// We didn't load anything, there wasn't anything to load - NoServicesToLoad, - /// We successfully loaded the zones from our ledger. - ServicesLoaded, -} - fn display_zone_init_errors(errors: &[(String, Box)]) -> String { if errors.len() == 1 { return format!( @@ -513,9 +492,6 @@ impl RealSystemApi { impl SystemApi for RealSystemApi {} -// The filename of the ledger, within the provided directory. -const ZONES_LEDGER_FILENAME: &str = "omicron-zones.json"; - /// Combines the Nexus-provided `OmicronZonesConfig` (which describes what Nexus /// wants for all of its zones) with the locally-determined configuration for /// these zones. @@ -729,57 +705,23 @@ enum SwitchZoneState { }, } -// The return type for `start_omicron_zones`. -// -// When multiple zones are started concurrently, some can fail while others -// succeed. This structure allows the function to return this nuanced -// information. -#[must_use] -struct StartZonesResult { - // The set of zones which have successfully started. - new_zones: Vec, - - // The set of (zone name, error) of zones that failed to start. - errors: Vec<(String, Error)>, -} - -// A running zone and the configuration which started it. -#[derive(Debug)] -struct OmicronZone { - runtime: RunningZone, - config: OmicronZoneConfigLocal, -} - -impl OmicronZone { - fn name(&self) -> &str { - self.runtime.name() - } -} - -type ZoneMap = BTreeMap; - /// Manages miscellaneous Sled-local services. pub struct ServiceManagerInner { log: Logger, global_zone_bootstrap_link_local_address: Ipv6Addr, switch_zone: Mutex, sled_mode: SledMode, - time_sync_config: TimeSyncConfig, time_synced: AtomicBool, switch_zone_maghemite_links: Vec, sidecar_revision: SidecarRevision, - // Zones representing running services - zones: Mutex, underlay_vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, bootstrap_vnic_allocator: VnicAllocator, ddm_reconciler: DdmReconciler, sled_info: OnceLock, switch_zone_bootstrap_address: Ipv6Addr, - storage: StorageHandle, - zone_bundler: ZoneBundler, zone_image_resolver: ZoneImageSourceResolver, - ledger_directory_override: OnceLock, + internal_disks_rx: InternalDisksReceiver, system_api: Box, } @@ -795,16 +737,6 @@ struct SledAgentInfo { metrics_queue: MetricsRequestQueue, } -pub(crate) enum TimeSyncConfig { - // Waits for NTP to confirm that time has been synchronized. - Normal, - // Skips timesync unconditionally. - Skip, - // Fails timesync unconditionally. - #[cfg(all(test, target_os = "illumos"))] - Fail, -} - #[derive(Clone)] pub struct ServiceManager { inner: Arc, @@ -909,39 +841,33 @@ impl ServiceManager { /// /// Args: /// - `log`: The logger - /// - `ddm_client`: Client pointed to our localhost ddmd + /// - `ddm_reconciler`: Handle for configuring our localhost ddmd /// - `bootstrap_networking`: Collection of etherstubs/VNICs set up when /// bootstrap agent begins /// - `sled_mode`: The sled's mode of operation (Gimlet vs Scrimlet). - /// - `time_sync_config`: Describes how the sled awaits synced time. /// - `sidecar_revision`: Rev of attached sidecar, if present. /// - `switch_zone_maghemite_links`: List of physical links on which /// maghemite should listen. - /// - `storage`: Shared handle to get the current state of disks/zpools. - #[allow(clippy::too_many_arguments)] + /// - `internal_disks_rx`: watch channel for changes to internal disks pub(crate) fn new( log: &Logger, ddm_reconciler: DdmReconciler, bootstrap_networking: BootstrapNetworking, sled_mode: SledMode, - time_sync_config: TimeSyncConfig, sidecar_revision: SidecarRevision, switch_zone_maghemite_links: Vec, - storage: StorageHandle, - zone_bundler: ZoneBundler, zone_image_resolver: ZoneImageSourceResolver, + internal_disks_rx: InternalDisksReceiver, ) -> Self { Self::new_inner( log, ddm_reconciler, bootstrap_networking, sled_mode, - time_sync_config, sidecar_revision, switch_zone_maghemite_links, - storage, - zone_bundler, zone_image_resolver, + internal_disks_rx, RealSystemApi::new(), ) } @@ -952,12 +878,10 @@ impl ServiceManager { ddm_reconciler: DdmReconciler, bootstrap_networking: BootstrapNetworking, sled_mode: SledMode, - time_sync_config: TimeSyncConfig, sidecar_revision: SidecarRevision, switch_zone_maghemite_links: Vec, - storage: StorageHandle, - zone_bundler: ZoneBundler, zone_image_resolver: ZoneImageSourceResolver, + internal_disks_rx: InternalDisksReceiver, system_api: Box, ) -> Self { let log = log.new(o!("component" => "ServiceManager")); @@ -971,11 +895,9 @@ impl ServiceManager { // Load the switch zone if it already exists? switch_zone: Mutex::new(SwitchZoneState::Disabled), sled_mode, - time_sync_config, time_synced: AtomicBool::new(false), sidecar_revision, switch_zone_maghemite_links, - zones: Mutex::new(BTreeMap::new()), underlay_vnic_allocator: VnicAllocator::new( "Service", bootstrap_networking.underlay_etherstub, @@ -991,25 +913,19 @@ impl ServiceManager { sled_info: OnceLock::new(), switch_zone_bootstrap_address: bootstrap_networking .switch_zone_bootstrap_ip, - storage, - zone_bundler, zone_image_resolver, - ledger_directory_override: OnceLock::new(), + internal_disks_rx, system_api, }), } } - #[cfg(all(test, target_os = "illumos"))] - fn override_ledger_directory(&self, path: Utf8PathBuf) { - self.inner.ledger_directory_override.set(path).unwrap(); - } + // TODO-john still needed? #[cfg(all(test, target_os = "illumos"))] fn override_image_directory(&self, path: Utf8PathBuf) { self.inner.zone_image_resolver.override_image_directory(path); } - pub(crate) fn ddm_reconciler(&self) -> &DdmReconciler { &self.inner.ddm_reconciler } @@ -1018,126 +934,6 @@ impl ServiceManager { self.inner.switch_zone_bootstrap_address } - // TODO: This function refers to an old, deprecated format for storing - // service information. It is not deprecated for cleanup purposes, but - // should otherwise not be called in new code. - async fn all_service_ledgers(&self) -> Vec { - pub const SERVICES_LEDGER_FILENAME: &str = "services.json"; - if let Some(dir) = self.inner.ledger_directory_override.get() { - return vec![dir.join(SERVICES_LEDGER_FILENAME)]; - } - let resources = self.inner.storage.get_latest_disks().await; - resources - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() - .map(|p| p.join(SERVICES_LEDGER_FILENAME)) - .collect() - } - - async fn all_omicron_zone_ledgers(&self) -> Vec { - if let Some(dir) = self.inner.ledger_directory_override.get() { - return vec![dir.join(ZONES_LEDGER_FILENAME)]; - } - let resources = self.inner.storage.get_latest_disks().await; - resources - .all_m2_mountpoints(CONFIG_DATASET) - .into_iter() - .map(|p| p.join(ZONES_LEDGER_FILENAME)) - .collect() - } - - // Loads persistent configuration about any Omicron-managed zones that we're - // supposed to be running. - async fn load_ledgered_zones( - &self, - // This argument attempts to ensure that the caller holds the right - // lock. - _map: &MutexGuard<'_, ZoneMap>, - ) -> Result>, Error> { - let log = &self.inner.log; - - // NOTE: This is a function where we used to access zones by "service - // ledgers". This format has since been deprecated, and these files, - // if they exist, should not be used. - // - // We try to clean them up at this spot. Deleting this "removal" code - // in the future should be a safe operation; this is a non-load-bearing - // cleanup. - for path in self.all_service_ledgers().await { - match tokio::fs::remove_file(&path).await { - Ok(_) => (), - Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => (), - Err(e) => { - warn!( - log, - "Failed to delete old service ledger"; - "err" => ?e, - "path" => ?path, - ); - } - } - } - - // Try to load the current software's zone ledger - let ledger_paths = self.all_omicron_zone_ledgers().await; - info!(log, "Loading Omicron zones from: {ledger_paths:?}"); - let maybe_ledger = - Ledger::::new(log, ledger_paths.clone()) - .await; - - let Some(ledger) = maybe_ledger else { - info!(log, "Loading Omicron zones - No zones detected"); - return Ok(None); - }; - - info!( - log, - "Loaded Omicron zones"; - "zones_config" => ?ledger.data() - ); - Ok(Some(ledger)) - } - - // TODO(https://github.com/oxidecomputer/omicron/issues/2973): - // - // The sled agent retries this function indefinitely at the call-site, but - // we could be smarter. - // - // - If we know that disks are missing, we could wait for them - // - We could permanently fail if we are able to distinguish other errors - // more clearly. - pub async fn load_services(&self) -> Result { - let log = &self.inner.log; - let mut existing_zones = self.inner.zones.lock().await; - let Some(mut ledger) = - self.load_ledgered_zones(&existing_zones).await? - else { - // Nothing found -- nothing to do. - info!( - log, - "Loading Omicron zones - \ - no zones nor old-format services found" - ); - return Ok(LoadServicesResult::NoServicesToLoad); - }; - - let zones_config = ledger.data_mut(); - info!( - log, - "Loaded Omicron zones"; - "zones_config" => ?zones_config - ); - let omicron_zones_config = - zones_config.clone().to_omicron_zones_config(); - - self.ensure_all_omicron_zones( - &mut existing_zones, - omicron_zones_config, - ) - .await?; - Ok(LoadServicesResult::ServicesLoaded) - } - /// Sets up "Sled Agent" information, including underlay info. /// /// Any subsequent calls after the first invocation return an error. @@ -1738,6 +1534,33 @@ impl ServiceManager { let zpools = ZoneImageZpools { root: &all_disks.mount_config().root, all_m2_zpools: all_disks.all_m2_zpools(), +/* TODO-john fix this + let zone_image_file_name = match image_source { + OmicronZoneImageSource::InstallDataset => None, + OmicronZoneImageSource::Artifact { hash } => Some(hash.to_string()), + }; + let internal_disks = self.inner.internal_disks_rx.current(); + let zone_image_paths = match image_source { + OmicronZoneImageSource::InstallDataset => { + // Look for the image in the ramdisk first + let mut zone_image_paths = + vec![Utf8PathBuf::from("/opt/oxide")]; + + // If the boot disk exists, look for the image in the "install" + // dataset there too. + if let Some(boot_zpool) = internal_disks.boot_disk_zpool() { + zone_image_paths.push(boot_zpool.dataset_mountpoint( + &internal_disks.mount_config().root, + INSTALL_DATASET, + )); + } + + zone_image_paths + } + OmicronZoneImageSource::Artifact { .. } => { + internal_disks.all_artifact_datasets().collect() + } +*/ }; let boot_zpool = all_disks.boot_disk().map(|(_, boot_zpool)| boot_zpool); @@ -3492,13 +3315,13 @@ impl ServiceManager { // - `all_u2_pools` provides a snapshot into durable storage on this sled, // which gives the storage manager an opportunity to validate the zone's // storage configuration against the reality of the current sled. - async fn start_omicron_zone( + pub(crate) async fn start_omicron_zone( &self, mount_config: &MountConfig, zone: &OmicronZoneConfig, time_is_synchronized: bool, - all_u2_pools: &Vec, - ) -> Result { + all_u2_pools: &[ZpoolName], + ) -> Result { // Ensure the zone has been fully removed before we try to boot it. // // This ensures that old "partially booted/stopped" zones do not @@ -3535,343 +3358,7 @@ impl ServiceManager { ) .await?; - Ok(OmicronZone { runtime, config }) - } - - // Concurrently attempts to start all zones identified by requests. - // - // This method is NOT idempotent. - // - // If we try to start ANY zones concurrently, the result is contained - // in the `StartZonesResult` value. This will contain the set of zones which - // were initialized successfully, as well as the set of zones which failed - // to start. - async fn start_omicron_zones( - &self, - mount_config: &MountConfig, - requests: impl Iterator + Clone, - time_is_synchronized: bool, - all_u2_pools: &Vec, - ) -> Result { - if let Some(name) = - requests.clone().map(|zone| zone.zone_name()).duplicates().next() - { - return Err(Error::BadServiceRequest { - service: name, - message: "Should not initialize zone twice".to_string(), - }); - } - - let futures = requests.map(|zone| async move { - self.start_omicron_zone( - mount_config, - &zone, - time_is_synchronized, - all_u2_pools, - ) - .await - .map_err(|err| (zone.zone_name(), err)) - }); - - let results = futures::future::join_all(futures).await; - - let mut new_zones = Vec::new(); - let mut errors = Vec::new(); - for result in results { - match result { - Ok(zone) => { - info!(self.inner.log, "Zone started"; "zone" => zone.name()); - new_zones.push(zone); - } - Err((name, error)) => { - warn!(self.inner.log, "Zone failed to start"; "zone" => &name); - errors.push((name, error)) - } - } - } - Ok(StartZonesResult { new_zones, errors }) - } - - /// Returns the current Omicron zone configuration - pub async fn omicron_zones_list(&self) -> OmicronZonesConfig { - let log = &self.inner.log; - - // We need to take the lock in order for the information in the ledger - // to be up-to-date. - let _existing_zones = self.inner.zones.lock().await; - - // Read the existing set of services from the ledger. - let zone_ledger_paths = self.all_omicron_zone_ledgers().await; - let ledger_data = match Ledger::::new( - log, - zone_ledger_paths.clone(), - ) - .await - { - Some(ledger) => ledger.data().clone(), - None => OmicronZonesConfigLocal::initial(), - }; - - ledger_data.to_omicron_zones_config() - } - - /// Ensures that particular Omicron zones are running - /// - /// These services will be instantiated by this function, and will be - /// recorded to a local file to ensure they start automatically on next - /// boot. - pub async fn ensure_all_omicron_zones_persistent( - &self, - mut request: OmicronZonesConfig, - ) -> Result<(), Error> { - let log = &self.inner.log; - - let mut existing_zones = self.inner.zones.lock().await; - - // Ensure that any zone images from the artifact store are present. - for zone in &request.zones { - if let Some(hash) = zone.image_source.artifact_hash() { - if let Err(err) = ArtifactStore::get_from_storage( - &self.inner.storage, - &self.inner.log, - hash, - ) - .await - { - return Err(Error::ZoneArtifactNotFound { - hash, - zone_kind: zone.zone_type.kind().report_str(), - id: zone.id, - err, - }); - } - } - } - - // Read the existing set of services from the ledger. - let zone_ledger_paths = self.all_omicron_zone_ledgers().await; - let mut ledger = match Ledger::::new( - log, - zone_ledger_paths.clone(), - ) - .await - { - Some(ledger) => ledger, - None => Ledger::::new_with( - log, - zone_ledger_paths.clone(), - OmicronZonesConfigLocal::initial(), - ), - }; - - let ledger_zone_config = ledger.data_mut(); - debug!(log, "ensure_all_omicron_zones_persistent"; - "request_generation" => request.generation.to_string(), - "ledger_generation" => - ledger_zone_config.omicron_generation.to_string(), - ); - - // Absolutely refuse to downgrade the configuration. - if ledger_zone_config.omicron_generation > request.generation { - return Err(Error::RequestedZoneConfigOutdated { - requested: request.generation, - current: ledger_zone_config.omicron_generation, - }); - } - - // If the generation is the same as what we're running, but the contents - // aren't, that's a problem, too. - if ledger_zone_config.omicron_generation == request.generation { - // Nexus should send us consistent zone orderings; however, we may - // reorder the zone list inside `ensure_all_omicron_zones`. To avoid - // equality checks failing only because the two lists are ordered - // differently, sort them both here before comparing. - let mut ledger_zones = - ledger_zone_config.clone().to_omicron_zones_config().zones; - - // We sort by ID because we assume no two zones have the same ID. If - // that assumption is wrong, we may return an error here where the - // conflict is soley the list orders, but in such a case that's the - // least of our problems. - ledger_zones.sort_by_key(|z| z.id); - request.zones.sort_by_key(|z| z.id); - - if ledger_zones != request.zones { - return Err(Error::RequestedConfigConflicts( - request.generation, - )); - } - } - - let omicron_generation = request.generation; - let ledger_generation = ledger_zone_config.ledger_generation; - self.ensure_all_omicron_zones(&mut existing_zones, request).await?; - let zones = existing_zones - .values() - .map(|omicron_zone| omicron_zone.config.clone()) - .collect(); - - let new_config = OmicronZonesConfigLocal { - omicron_generation, - ledger_generation, - zones, - }; - - // If the contents of the ledger would be identical, we can avoid - // performing an update and commit. - if *ledger_zone_config == new_config { - return Ok(()); - } - - // Update the zones in the ledger and write it back to both M.2s - *ledger_zone_config = new_config; - ledger.commit().await?; - - Ok(()) - } - - // Ensures that only the following Omicron zones are running. - // - // This method strives to be idempotent. - // - // - Starting and stopping zones is not an atomic operation - it's possible - // that we cannot start a zone after a previous one has been successfully - // created (or destroyed) intentionally. As a result, even in error cases, - // it's possible that the set of `existing_zones` changes. However, this set - // will only change in the direction of `new_request`: zones will only be - // removed if they ARE NOT part of `new_request`, and zones will only be - // added if they ARE part of `new_request`. - // - Zones are generally not updated in-place (i.e., two zone configurations - // that differ in any way are treated as entirely distinct), with an - // exception for backfilling the `filesystem_pool`, as long as the new - // request's filesystem pool matches the actual pool for that zones. This - // in-place update is allowed because changing only that property to match - // the runtime system does not require reconfiguring the zone or shutting it - // down and restarting it. - // - This method does not record any information such that these services - // are re-instantiated on boot. - async fn ensure_all_omicron_zones( - &self, - // The MutexGuard here attempts to ensure that the caller has the right - // lock held when calling this function. - existing_zones: &mut MutexGuard<'_, ZoneMap>, - new_request: OmicronZonesConfig, - ) -> Result<(), Error> { - // Do some data-normalization to ensure we can compare the "requested - // set" vs the "existing set" as HashSets. - let ReconciledNewZonesRequest { - zones_to_be_removed, - zones_to_be_added, - } = reconcile_running_zones_with_new_request( - existing_zones, - new_request, - &self.inner.log, - )?; - - // Destroy zones that should not be running - for zone in zones_to_be_removed { - self.zone_bundle_and_try_remove(existing_zones, &zone).await; - } - - // Collect information that's necessary to start new zones - let storage = self.inner.storage.get_latest_disks().await; - let mount_config = storage.mount_config(); - let all_u2_pools = storage.all_u2_zpools(); - let time_is_synchronized = - match self.timesync_get_locked(&existing_zones).await { - // Time is synchronized - Ok(TimeSync { sync: true, .. }) => true, - // Time is not synchronized, or we can't check - _ => false, - }; - - // Concurrently boot all new zones - let StartZonesResult { new_zones, errors } = self - .start_omicron_zones( - mount_config, - zones_to_be_added.iter(), - time_is_synchronized, - &all_u2_pools, - ) - .await?; - - // Add the new zones to our tracked zone set - existing_zones.extend( - new_zones.into_iter().map(|zone| (zone.name().to_string(), zone)), - ); - - // If any zones failed to start, exit with an error - if !errors.is_empty() { - return Err(Error::ZoneEnsure { errors }); - } - Ok(()) - } - - // Attempts to take a zone bundle and remove a zone. - // - // Logs, but does not return an error on failure. - async fn zone_bundle_and_try_remove( - &self, - existing_zones: &mut MutexGuard<'_, ZoneMap>, - zone: &OmicronZoneConfig, - ) { - let log = &self.inner.log; - let expected_zone_name = zone.zone_name(); - let Some(mut zone) = existing_zones.remove(&expected_zone_name) else { - warn!( - log, - "Expected to remove zone, but could not find it"; - "zone_name" => &expected_zone_name, - ); - return; - }; - // Ensure that the sled agent's metrics task is not tracking the zone's - // VNICs or OPTE ports. - if let Some(queue) = self.maybe_metrics_queue() { - match queue.untrack_zone_links(&zone.runtime) { - Ok(_) => debug!( - log, - "stopped tracking zone datalinks"; - "zone_name" => &expected_zone_name, - ), - Err(errors) => error!( - log, - "failed to stop tracking zone datalinks"; - "errors" => ?errors, - "zone_name" => &expected_zone_name - ), - } - } - debug!( - log, - "removing an existing zone"; - "zone_name" => &expected_zone_name, - ); - if let Err(e) = self - .inner - .zone_bundler - .create(&zone.runtime, ZoneBundleCause::UnexpectedZone) - .await - { - error!( - log, - "Failed to take bundle of unexpected zone"; - "zone_name" => &expected_zone_name, - InlineErrorChain::new(&e), - ); - } - if let Err(e) = zone.runtime.stop().await { - error!(log, "Failed to stop zone {}: {e}", zone.name()); - } - if let Err(e) = - self.clean_up_after_zone_shutdown(&zone.config.zone).await - { - error!( - log, - "Failed to clean up after stopping zone {}", zone.name(); - InlineErrorChain::new(&e), - ); - } + Ok(runtime) } // Ensures that if a zone is about to be installed, it does not exist. @@ -3969,7 +3456,7 @@ impl ServiceManager { &self, mount_config: &MountConfig, zone: &OmicronZoneConfig, - all_u2_pools: &Vec, + all_u2_pools: &[ZpoolName], ) -> Result { let name = zone.zone_name(); @@ -4068,110 +3555,12 @@ impl ServiceManager { } } - pub async fn timesync_get(&self) -> Result { - let existing_zones = self.inner.zones.lock().await; - self.timesync_get_locked(&existing_zones).await - } - - async fn timesync_get_locked( - &self, - existing_zones: &tokio::sync::MutexGuard<'_, ZoneMap>, - ) -> Result { - let skip_timesync = match &self.inner.time_sync_config { - TimeSyncConfig::Normal => false, - TimeSyncConfig::Skip => true, - #[cfg(all(test, target_os = "illumos"))] - TimeSyncConfig::Fail => { - info!(self.inner.log, "Configured to fail timesync checks"); - return Err(Error::TimeNotSynchronized); - } - }; - - if skip_timesync { - info!(self.inner.log, "Configured to skip timesync checks"); - self.on_time_sync().await; - return Ok(TimeSync { - sync: true, - ref_id: 0, - ip_addr: IPV6_UNSPECIFIED, - stratum: 0, - ref_time: 0.0, - correction: 0.00, - }); - }; - - let ntp_zone_name = - InstalledZone::get_zone_name(ZoneKind::NTP_PREFIX, None); - - let ntp_zone = existing_zones - .iter() - .find(|(name, _)| name.starts_with(&ntp_zone_name)) - .ok_or_else(|| Error::NtpZoneNotReady)? - .1; - - // XXXNTP - This could be replaced with a direct connection to the - // daemon using a patched version of the chrony_candm crate to allow - // a custom server socket path. From the GZ, it should be possible to - // connect to the UNIX socket at - // format!("{}/var/run/chrony/chronyd.sock", ntp_zone.root()) - - match ntp_zone.runtime.run_cmd(&["/usr/bin/chronyc", "-c", "tracking"]) - { - Ok(stdout) => { - let v: Vec<&str> = stdout.split(',').collect(); - - if v.len() > 9 { - let ref_id = u32::from_str_radix(v[0], 16) - .map_err(|_| Error::NtpZoneNotReady)?; - let ip_addr = - IpAddr::from_str(v[1]).unwrap_or(IPV6_UNSPECIFIED); - let stratum = u8::from_str(v[2]) - .map_err(|_| Error::NtpZoneNotReady)?; - let ref_time = f64::from_str(v[3]) - .map_err(|_| Error::NtpZoneNotReady)?; - let correction = f64::from_str(v[4]) - .map_err(|_| Error::NtpZoneNotReady)?; - - // Per `chronyc waitsync`'s implementation, if either the - // reference IP address is not unspecified or the reference - // ID is not 0 or 0x7f7f0101, we are synchronized to a peer. - let peer_sync = !ip_addr.is_unspecified() - || (ref_id != 0 && ref_id != 0x7f7f0101); - - let sync = stratum < 10 - && ref_time > 1234567890.0 - && peer_sync - && correction.abs() <= 0.05; - - if sync { - self.on_time_sync().await; - } - - Ok(TimeSync { - sync, - ref_id, - ip_addr, - stratum, - ref_time, - correction, - }) - } else { - Err(Error::NtpZoneNotReady) - } - } - Err(e) => { - error!(self.inner.log, "chronyc command failed: {}", e); - Err(Error::NtpZoneNotReady) - } - } - } - /// Check if the synchronization state of the sled has shifted to true and /// if so, execute the any out-of-band actions that need to be taken. /// /// This function only executes the out-of-band actions once, once the /// synchronization state has shifted to true. - async fn on_time_sync(&self) { + pub(crate) async fn on_time_sync(&self) { if self .inner .time_synced @@ -4988,973 +4377,9 @@ fn internal_dns_addrobj_name(gz_address_index: u32) -> String { format!("internaldns{gz_address_index}") } -#[derive(Debug)] -struct ReconciledNewZonesRequest { - zones_to_be_removed: HashSet, - zones_to_be_added: HashSet, -} - -fn reconcile_running_zones_with_new_request( - existing_zones: &mut MutexGuard<'_, ZoneMap>, - new_request: OmicronZonesConfig, - log: &Logger, -) -> Result { - reconcile_running_zones_with_new_request_impl( - existing_zones - .values_mut() - .map(|z| (&mut z.config.zone, z.runtime.root_zpool())), - new_request, - log, - ) -} - -// Separate helper function for `reconcile_running_zones_with_new_request` that -// allows unit tests to exercise the implementation without having to construct -// a `&mut MutexGuard<'_, ZoneMap>` for `existing_zones`. -fn reconcile_running_zones_with_new_request_impl<'a>( - existing_zones_with_runtime_zpool: impl Iterator< - Item = (&'a mut OmicronZoneConfig, &'a ZpoolOrRamdisk), - >, - new_request: OmicronZonesConfig, - log: &Logger, -) -> Result { - let mut existing_zones_by_id: BTreeMap<_, _> = - existing_zones_with_runtime_zpool - .map(|(zone, zpool)| (zone.id, (zone, zpool))) - .collect(); - let mut zones_to_be_added = HashSet::new(); - let mut zones_to_be_removed = HashSet::new(); - let mut zones_to_update = Vec::new(); - - for zone in new_request.zones.into_iter() { - let Some((existing_zone, runtime_zpool)) = - existing_zones_by_id.remove(&zone.id) - else { - // This zone isn't in the existing set; add it. - zones_to_be_added.insert(zone); - continue; - }; - - // We're already running this zone. If the config hasn't changed, we - // have nothing to do. - if zone == *existing_zone { - continue; - } - - // Special case for fixing #7229. We have an incoming request for a zone - // that we're already running except the config has changed; normally, - // we'd shut the zone down and restart it. However, if we get a new - // request that is: - // - // 1. setting `filesystem_pool`, and - // 2. the config for this zone is otherwise identical, and - // 3. the new `filesystem_pool` matches the pool on which the zone is - // installed - // - // then we don't want to shut the zone down and restart it, because the - // config hasn't actually changed in any meaningful way; this is just - // reconfigurator correcting #7229. - if let Some(new_filesystem_pool) = &zone.filesystem_pool { - let differs_only_by_filesystem_pool = { - // Clone `existing_zone` and mutate its `filesystem_pool` to - // match the new request; if they now match, that's the only - // field that's different. - let mut existing = existing_zone.clone(); - existing.filesystem_pool = Some(*new_filesystem_pool); - existing == zone - }; - - let runtime_zpool = match runtime_zpool { - ZpoolOrRamdisk::Zpool(zpool_name) => zpool_name, - ZpoolOrRamdisk::Ramdisk => { - // The only zone we run on the ramdisk is the switch - // zone, for which it isn't possible to get a zone - // request, so it should be fine to put an - // `unreachable!()` here. Out of caution for future - // changes, we'll instead return an error that the - // requested zone is on the ramdisk. - error!( - log, - "fix-7229: unexpectedly received request with a \ - zone config for a zone running on ramdisk"; - "new_config" => ?zone, - "existing_config" => ?existing_zone, - ); - return Err(Error::ZoneIsRunningOnRamdisk { - zone_id: zone.id, - }); - } - }; - - if differs_only_by_filesystem_pool { - if new_filesystem_pool == runtime_zpool { - // Our #7229 special case: the new config is only filling in - // the pool, and it does so correctly. Move on to the next - // zone in the request without adding this zone to either of - // our `zone_to_be_*` sets. - info!( - log, - "fix-7229: accepted new zone config that changes only \ - filesystem_pool"; - "new_config" => ?zone, - ); - - // We should update this `existing_zone`, but delay doing so - // until we've processed all zones (so if there are any - // failures later, we don't return having partially-updated - // the existing zones). - zones_to_update.push((existing_zone, zone)); - continue; - } else { - error!( - log, - "fix-7229: rejected new zone config that changes only \ - filesystem_pool (incorrect pool)"; - "new_config" => ?zone, - "expected_pool" => %runtime_zpool, - ); - return Err(Error::InvalidFilesystemPoolZoneConfig { - zone_id: zone.id, - expected_pool: *runtime_zpool, - got_pool: *new_filesystem_pool, - }); - } - } - } - - // End of #7229 special case: this zone is already running, but the new - // request has changed it in some way. We need to shut it down and - // restart it. - zones_to_be_removed.insert(existing_zone.clone()); - zones_to_be_added.insert(zone); - } - - // Any remaining entries in `existing_zones_by_id` should be shut down. - zones_to_be_removed - .extend(existing_zones_by_id.into_values().map(|(z, _)| z.clone())); - - // All zones have been handled successfully; commit any changes to existing - // zones we found in our "fix 7229" special case above. - let num_zones_updated = zones_to_update.len(); - for (existing_zone, new_zone) in zones_to_update { - *existing_zone = new_zone; - } - - info!( - log, - "ensure_all_omicron_zones: request reconciliation done"; - "num_zones_to_be_removed" => zones_to_be_removed.len(), - "num_zones_to_be_added" => zones_to_be_added.len(), - "num_zones_updated" => num_zones_updated, - ); - Ok(ReconciledNewZonesRequest { zones_to_be_removed, zones_to_be_added }) -} - -#[cfg(all(test, target_os = "illumos"))] -mod illumos_tests { - use crate::metrics; - - use super::*; - use illumos_utils::dladm::{ - BOOTSTRAP_ETHERSTUB_NAME, Etherstub, UNDERLAY_ETHERSTUB_NAME, - UNDERLAY_ETHERSTUB_VNIC_NAME, - }; - - use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; - use omicron_uuid_kinds::OmicronZoneUuid; - use sled_agent_zone_images::ZoneImageZpools; - use sled_storage::manager_test_harness::StorageManagerTestHarness; - use std::{ - net::{Ipv6Addr, SocketAddrV6}, - time::Duration, - }; - use tokio::sync::mpsc::error::TryRecvError; - use uuid::Uuid; - - // Just placeholders. Not used. - const GLOBAL_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; - const SWITCH_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; - - const EXPECTED_PORT: u16 = 12223; - - // Timeout within which we must have received a message about a zone's links - // to track. This is very generous. - const LINK_NOTIFICATION_TIMEOUT: Duration = Duration::from_secs(5); - - struct FakeSystemApi { - fake_install_dir: Utf8PathBuf, - dladm: Arc, - zones: Arc, - } - - impl FakeSystemApi { - fn new(fake_install_dir: Utf8PathBuf) -> Box { - Box::new(Self { - fake_install_dir, - dladm: illumos_utils::fakes::dladm::Dladm::new(), - zones: illumos_utils::fakes::zone::Zones::new(), - }) - } - } - - impl SystemApi for FakeSystemApi { - fn fake_install_dir(&self) -> Option<&Utf8Path> { - Some(&self.fake_install_dir) - } - - fn dladm(&self) -> Arc { - self.dladm.clone() - } - - fn zones(&self) -> Arc { - self.zones.clone() - } - } - - fn make_bootstrap_networking_config() -> BootstrapNetworking { - BootstrapNetworking { - bootstrap_etherstub: Etherstub( - BOOTSTRAP_ETHERSTUB_NAME.to_string(), - ), - global_zone_bootstrap_ip: GLOBAL_ZONE_BOOTSTRAP_IP, - global_zone_bootstrap_link_local_ip: GLOBAL_ZONE_BOOTSTRAP_IP, - switch_zone_bootstrap_ip: SWITCH_ZONE_BOOTSTRAP_IP, - underlay_etherstub: Etherstub(UNDERLAY_ETHERSTUB_NAME.to_string()), - underlay_etherstub_vnic: EtherstubVnic( - UNDERLAY_ETHERSTUB_VNIC_NAME.to_string(), - ), - } - } - - // Prepare to call "ensure" for a new service, then actually call "ensure". - async fn ensure_new_service( - mgr: &ServiceManager, - id: OmicronZoneUuid, - generation: Generation, - ) { - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - try_new_service_of_type( - mgr, - id, - generation, - OmicronZoneType::InternalNtp { address }, - ) - .await - .expect("Could not create service"); - } - - async fn try_new_service_of_type( - mgr: &ServiceManager, - id: OmicronZoneUuid, - generation: Generation, - zone_type: OmicronZoneType, - ) -> Result<(), Error> { - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation, - zones: vec![OmicronZoneConfig { - id, - zone_type, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }], - }) - .await - } - - // Prepare to call "ensure" for a service which already exists. We should - // return the service without actually installing a new zone. - async fn ensure_existing_service( - mgr: &ServiceManager, - id: OmicronZoneUuid, - generation: Generation, - ) { - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation, - zones: vec![OmicronZoneConfig { - id, - zone_type: OmicronZoneType::InternalNtp { address }, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }], - }) - .await - .unwrap(); - } - - // Prepare to drop the service manager. - // - // This will shut down all allocated zones, and delete their - // associated VNICs. - async fn drop_service_manager(mgr: ServiceManager) { - // Also send a message to the metrics task that the VNIC has been - // deleted. - let queue = mgr.metrics_queue(); - for zone in mgr.inner.zones.lock().await.values() { - if let Err(e) = queue.untrack_zone_links(&zone.runtime) { - error!( - mgr.inner.log, - "failed to stop tracking zone datalinks"; - "errors" => ?e, - ); - } - } - - // Explicitly drop the service manager - drop(mgr); - } - - struct TestConfig { - config_dir: camino_tempfile::Utf8TempDir, - } - - impl TestConfig { - async fn new() -> Self { - let config_dir = camino_tempfile::Utf8TempDir::new().unwrap(); - Self { config_dir } - } - - fn make_config(&self) -> Config { - Config { - sled_identifiers: SledIdentifiers { - rack_id: Uuid::new_v4(), - sled_id: Uuid::new_v4(), - model: "fake-gimlet".to_string(), - revision: 1, - serial: "fake-serial".to_string(), - }, - sidecar_revision: SidecarRevision::Physical( - "rev_whatever_its_a_test".to_string(), - ), - } - } - - fn override_paths(&self, mgr: &ServiceManager) { - let dir = self.config_dir.path(); - mgr.override_ledger_directory(dir.to_path_buf()); - mgr.override_image_directory(dir.to_path_buf()); - - // We test launching "fake" versions of the zones, but the - // logic to find paths relies on checking the existence of - // files. - std::fs::write(dir.join("oximeter.tar.gz"), "Not a real file") - .unwrap(); - std::fs::write(dir.join("ntp.tar.gz"), "Not a real file").unwrap(); - } - } - - async fn setup_storage(log: &Logger) -> StorageManagerTestHarness { - let mut harness = StorageManagerTestHarness::new(&log).await; - let raw_disks = - harness.add_vdevs(&["u2_test.vdev", "m2_test.vdev"]).await; - harness.handle().key_manager_ready().await; - let config = harness.make_config(1, &raw_disks); - let result = harness - .handle() - .omicron_physical_disks_ensure(config.clone()) - .await - .expect("Failed to ensure disks"); - assert!(!result.has_error(), "{:?}", result); - harness - } - - struct LedgerTestHelper<'a> { - log: slog::Logger, - storage_test_harness: StorageManagerTestHarness, - zone_bundler: ZoneBundler, - zone_image_resolver: ZoneImageSourceResolver, - test_config: &'a TestConfig, - } - - impl<'a> LedgerTestHelper<'a> { - async fn new(log: slog::Logger, test_config: &'a TestConfig) -> Self { - let storage_test_harness = setup_storage(&log).await; - let zone_bundler = ZoneBundler::new( - log.clone(), - storage_test_harness.handle().clone(), - Default::default(), - ) - .await; - - let mut storage_manager = storage_test_harness.handle().clone(); - let all_disks = storage_manager.get_latest_disks().await; - let (_, boot_zpool) = storage_manager.wait_for_boot_disk().await; - let zpools = ZoneImageZpools { - root: &all_disks.mount_config().root, - all_m2_zpools: all_disks.all_m2_zpools(), - }; - let zone_image_resolver = - ZoneImageSourceResolver::new(&log, &zpools, &boot_zpool); - - LedgerTestHelper { - log, - storage_test_harness, - zone_bundler, - zone_image_resolver, - test_config, - } - } - - async fn cleanup(&mut self) { - self.storage_test_harness.cleanup().await; - } - - fn new_service_manager( - &self, - system: Box, - ) -> ServiceManager { - self.new_service_manager_with_timesync(TimeSyncConfig::Skip, system) - } - - fn new_service_manager_with_timesync( - &self, - time_sync_config: TimeSyncConfig, - system: Box, - ) -> ServiceManager { - let log = &self.log; - let reconciler = - DdmReconciler::new(Ipv6Subnet::new(Ipv6Addr::LOCALHOST), log) - .expect("created DdmReconciler"); - let mgr = ServiceManager::new_inner( - log, - reconciler, - make_bootstrap_networking_config(), - SledMode::Auto, - time_sync_config, - SidecarRevision::Physical("rev-test".to_string()), - vec![], - self.storage_test_harness.handle().clone(), - self.zone_bundler.clone(), - self.zone_image_resolver.clone(), - system, - ); - self.test_config.override_paths(&mgr); - mgr - } - - async fn sled_agent_started( - log: &slog::Logger, - test_config: &TestConfig, - mgr: &ServiceManager, - metrics_queue: MetricsRequestQueue, - ) { - let port_manager = PortManager::new( - log.new(o!("component" => "PortManager")), - Ipv6Addr::new( - 0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, - ), - ); - - mgr.sled_agent_started( - test_config.make_config(), - port_manager, - Ipv6Addr::LOCALHOST, - Uuid::new_v4(), - None, - metrics_queue, - ) - .await - .unwrap(); - } - } - - #[tokio::test] - async fn test_ensure_service() { - let logctx = - omicron_test_utils::dev::test_setup_log("test_ensure_service"); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v1 = Generation::new(); - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v1); - assert!(found.zones.is_empty()); - - let v2 = v1.next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v2); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id); - - // First check that we received the synced sled notification - let synced_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ).await.expect("Should have received a message about the sled being synced within the timeout") - .expect("Should have received a message about the sled being synced"); - assert_eq!( - synced_message, - metrics::Message::TimeSynced { sled_id: mgr.sled_id() }, - ); - - // Then, check that we received a message about the zone's VNIC. - let vnic_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ) - .await - .expect( - "Should have received a message about the zone's VNIC within the timeout" - ) - .expect("Should have received a message about the zone's VNIC"); - let zone_name = format!("oxz_ntp_{}", id); - assert_eq!( - vnic_message, - metrics::Message::TrackVnic { - zone_name, - name: "oxControlService0".into() - }, - ); - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_ensure_service_before_timesync() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_ensure_service_before_timesync", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - - let mgr = helper.new_service_manager_with_timesync( - TimeSyncConfig::Fail, - FakeSystemApi::new(test_config.config_dir.path().to_path_buf()), - ); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v1 = Generation::new(); - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v1); - assert!(found.zones.is_empty()); - - let v2 = v1.next(); - let id = OmicronZoneUuid::new_v4(); - - // Should fail: time has not yet synchronized. - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - let result = try_new_service_of_type( - &mgr, - id, - v2, - OmicronZoneType::Oximeter { address }, - ) - .await; - - // First, ensure this is the right kind of error. - let err = result.unwrap_err(); - let errors = match &err { - Error::ZoneEnsure { errors } => errors, - err => panic!("unexpected result: {err:?}"), - }; - assert_eq!(errors.len(), 1); - assert_matches::assert_matches!( - errors[0].1, - Error::TimeNotSynchronized - ); - - // Ensure we have _not_ received a message about the zone's VNIC, - // because there isn't a zone. - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - // Next, ensure this still converts to an "unavail" common error - let common_err = omicron_common::api::external::Error::from(err); - assert_matches::assert_matches!( - common_err, - omicron_common::api::external::Error::ServiceUnavailable { .. } - ); - - // Should succeed: we don't care that time has not yet synchronized (for - // this specific service). - try_new_service_of_type( - &mgr, - id, - v2, - OmicronZoneType::InternalNtp { address }, - ) - .await - .unwrap(); - - drop_service_manager(mgr).await; - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_ensure_service_which_already_exists() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_ensure_service_which_already_exists", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v2 = Generation::new().next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - let v3 = v2.next(); - ensure_existing_service(&mgr, id, v3).await; - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v3); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id); - - // First, we will get a message about the sled being synced. - let synced_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ).await.expect("Should have received a message about the sled being synced within the timeout") - .expect("Should have received a message about the sled being synced"); - assert_eq!( - synced_message, - metrics::Message::TimeSynced { sled_id: mgr.sled_id() } - ); - - // In this case, the manager creates the zone once, and then "ensuring" - // it a second time is a no-op. So we simply expect the same message - // sequence as starting a zone for the first time. - let vnic_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ) - .await - .expect( - "Should have received a message about the zone's VNIC within the timeout" - ) - .expect("Should have received a message about the zone's VNIC"); - let zone_name = format!("oxz_ntp_{}", id); - assert_eq!( - vnic_message, - metrics::Message::TrackVnic { - zone_name, - name: "oxControlService0".into() - }, - ); - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_services_are_recreated_on_reboot() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_services_are_recreated_on_reboot", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - - // First, spin up a ServiceManager, create a new zone, and then tear - // down the ServiceManager. - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let v2 = Generation::new().next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - - let sled_id = mgr.sled_id(); - drop_service_manager(mgr).await; - - // First, we will get a message about the sled being synced. - let synced_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ).await.expect("Should have received a message about the sled being synced within the timeout") - .expect("Should have received a message about the sled being synced"); - assert_eq!(synced_message, metrics::Message::TimeSynced { sled_id }); - - // Check that we received a message about the zone's VNIC. Since the - // manager is being dropped, it should also send a message about the - // VNIC being deleted. - let zone_name = format!("oxz_ntp_{}", id); - for expected_vnic_message in [ - metrics::Message::TrackVnic { - zone_name, - name: "oxControlService0".into(), - }, - metrics::Message::UntrackVnic { name: "oxControlService0".into() }, - ] { - println!( - "Expecting message from manager: {expected_vnic_message:#?}" - ); - let vnic_message = tokio::time::timeout( - LINK_NOTIFICATION_TIMEOUT, - metrics_rx.recv(), - ) - .await - .expect( - "Should have received a message about the zone's VNIC within the timeout" - ) - .expect("Should have received a message about the zone's VNIC"); - assert_eq!(vnic_message, expected_vnic_message,); - } - // Note that the manager has been dropped, so we should get - // disconnected, not empty. - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Disconnected)); - - // Before we re-create the service manager - notably, using the same - // config file! - expect that a service gets initialized. - // TODO? - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let (metrics_queue, mut metrics_rx) = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_queue, - ) - .await; - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v2); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id); - - // Note that the `omicron_zones_list()` request just returns the - // configured zones, stored in the on-disk ledger. There is nothing - // above that actually ensures that those zones exist, as far as I can - // tell! - assert_eq!(metrics_rx.try_recv(), Err(TryRecvError::Empty)); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_services_do_not_persist_without_config() { - let logctx = omicron_test_utils::dev::test_setup_log( - "test_services_do_not_persist_without_config", - ); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - - // First, spin up a ServiceManager, create a new zone, and then tear - // down the ServiceManager. - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let metrics_handles = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_handles.0.clone(), - ) - .await; - - let v1 = Generation::new(); - let v2 = v1.next(); - let id = OmicronZoneUuid::new_v4(); - ensure_new_service(&mgr, id, v2).await; - drop_service_manager(mgr).await; - - // Next, delete the ledger. This means the zone we just created will not - // be remembered on the next initialization. - std::fs::remove_file( - test_config.config_dir.path().join(ZONES_LEDGER_FILENAME), - ) - .unwrap(); - - // Observe that the old service is not re-initialized. - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let metrics_handles = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_handles.0.clone(), - ) - .await; - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v1); - assert!(found.zones.is_empty()); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } - - #[tokio::test] - async fn test_bad_generations() { - // Start like the normal tests. - let logctx = - omicron_test_utils::dev::test_setup_log("test_bad_generations"); - let test_config = TestConfig::new().await; - let mut helper = - LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let mgr = helper.new_service_manager(FakeSystemApi::new( - test_config.config_dir.path().to_path_buf(), - )); - let metrics_handles = MetricsRequestQueue::for_test(); - LedgerTestHelper::sled_agent_started( - &logctx.log, - &test_config, - &mgr, - metrics_handles.0.clone(), - ) - .await; - - // Like the normal tests, set up a generation with one zone in it. - let v1 = Generation::new(); - let v2 = v1.next(); - let id1 = OmicronZoneUuid::new_v4(); - - let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); - let mut zones = vec![OmicronZoneConfig { - id: id1, - zone_type: OmicronZoneType::InternalNtp { address }, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }]; - - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v2, - zones: zones.clone(), - }) - .await - .unwrap(); - - let found = mgr.omicron_zones_list().await; - assert_eq!(found.generation, v2); - assert_eq!(found.zones.len(), 1); - assert_eq!(found.zones[0].id, id1); - - // Make a new list of zones that we're going to try with a bunch of - // different generation numbers. - let id2 = OmicronZoneUuid::new_v4(); - zones.push(OmicronZoneConfig { - id: id2, - zone_type: OmicronZoneType::InternalNtp { address }, - filesystem_pool: None, - image_source: OmicronZoneImageSource::InstallDataset, - }); - - // Now try to apply that list with an older generation number. This - // shouldn't work and the reported state should be unchanged. - let error = mgr - .ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v1, - zones: zones.clone(), - }) - .await - .expect_err("unexpectedly went backwards in zones generation"); - assert!(matches!( - error, - Error::RequestedZoneConfigOutdated { requested, current } - if requested == v1 && current == v2 - )); - let found2 = mgr.omicron_zones_list().await; - assert_eq!(found, found2); - - // Now try to apply that list with the same generation number that we - // used before. This shouldn't work either. - let error = mgr - .ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v2, - zones: zones.clone(), - }) - .await - .expect_err("unexpectedly changed a single zone generation"); - assert!(matches!( - error, - Error::RequestedConfigConflicts(vr) if vr == v2 - )); - let found3 = mgr.omicron_zones_list().await; - assert_eq!(found, found3); - - // But we should be able to apply this new list of zones as long as we - // advance the generation number. - let v3 = v2.next(); - mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { - generation: v3, - zones: zones.clone(), - }) - .await - .expect("failed to remove all zones in a new generation"); - let found4 = mgr.omicron_zones_list().await; - assert_eq!(found4.generation, v3); - let mut our_zones = zones; - our_zones.sort_by(|a, b| a.id.cmp(&b.id)); - let mut found_zones = found4.zones; - found_zones.sort_by(|a, b| a.id.cmp(&b.id)); - assert_eq!(our_zones, found_zones); - - drop_service_manager(mgr).await; - - helper.cleanup().await; - logctx.cleanup_successful(); - } -} - #[cfg(test)] mod test { use super::*; - use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; - use omicron_uuid_kinds::ZpoolUuid; use sled_agent_types::zone_bundle::ZoneBundleMetadata; #[test] @@ -5987,282 +4412,4 @@ mod test { &serde_json::to_string_pretty(&schema).unwrap(), ); } - - #[test] - fn test_fix_7229_zone_config_reconciliation() { - fn make_omicron_zone_config( - filesystem_pool: Option<&ZpoolName>, - ) -> OmicronZoneConfig { - OmicronZoneConfig { - id: OmicronZoneUuid::new_v4(), - filesystem_pool: filesystem_pool.cloned(), - zone_type: OmicronZoneType::Oximeter { - address: "[::1]:0".parse().unwrap(), - }, - image_source: OmicronZoneImageSource::InstallDataset, - } - } - - let logctx = - omicron_test_utils::dev::test_setup_log("test_ensure_service"); - let log = &logctx.log; - - let some_zpools = (0..10) - .map(|_| ZpoolName::new_external(ZpoolUuid::new_v4())) - .collect::>(); - - // Test 1: We have some zones; the new config makes no changes. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0]), - ), - ( - make_omicron_zone_config(Some(&some_zpools[1])), - ZpoolOrRamdisk::Zpool(some_zpools[1]), - ), - ( - make_omicron_zone_config(Some(&some_zpools[2])), - ZpoolOrRamdisk::Zpool(some_zpools[2]), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing.iter().map(|(zone, _)| zone.clone()).collect(), - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - assert_eq!(reconciled.zones_to_be_removed, HashSet::new()); - assert_eq!(reconciled.zones_to_be_added, HashSet::new()); - assert_eq!( - existing.iter().map(|(z, _)| z.clone()).collect::>(), - new_request.zones, - ); - } - - // Test 2: We have some zones; the new config changes `filesystem_pool` - // to match our runtime pools (i.e., the #7229 fix). - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2]), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing - .iter() - .enumerate() - .map(|(i, (zone, _))| { - let mut zone = zone.clone(); - zone.filesystem_pool = Some(some_zpools[i]); - zone - }) - .collect(), - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - assert_eq!(reconciled.zones_to_be_removed, HashSet::new()); - assert_eq!(reconciled.zones_to_be_added, HashSet::new()); - assert_eq!( - existing.iter().map(|(z, _)| z.clone()).collect::>(), - new_request.zones, - ); - } - - // Test 3: We have some zones; the new config changes `filesystem_pool` - // to match our runtime pools (i.e., the #7229 fix) but also changes - // something else in the config for the final zone; we should attempt to - // remove and re-add that final zone. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2]), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing - .iter() - .enumerate() - .map(|(i, (zone, _))| { - let mut zone = zone.clone(); - zone.filesystem_pool = Some(some_zpools[i]); - if i == 2 { - zone.zone_type = OmicronZoneType::Oximeter { - address: "[::1]:10000".parse().unwrap(), - }; - } - zone - }) - .collect(), - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - assert_eq!( - reconciled.zones_to_be_removed, - HashSet::from([existing[2].0.clone()]), - ); - assert_eq!( - reconciled.zones_to_be_added, - HashSet::from([new_request.zones[2].clone()]), - ); - // The first two existing zones should have been updated to match - // the new request. - assert_eq!( - Vec::from_iter(existing[..2].iter().map(|(z, _)| z.clone())), - &new_request.zones[..2], - ); - } - - // Test 4: We have some zones; the new config changes `filesystem_pool` - // to match our runtime pools (i.e., the #7229 fix), except the new pool - // on the final zone is incorrect. We should get an error back. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2]), - ), - ]; - let existing_orig = - existing.iter().map(|(z, _)| z.clone()).collect::>(); - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: existing - .iter() - .enumerate() - .map(|(i, (zone, _))| { - let mut zone = zone.clone(); - if i < 2 { - zone.filesystem_pool = Some(some_zpools[i]); - } else { - zone.filesystem_pool = Some(some_zpools[4]); - } - zone - }) - .collect(), - }; - let err = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect_err("should not have reconciled successfully"); - - match err { - Error::InvalidFilesystemPoolZoneConfig { - zone_id, - expected_pool, - got_pool, - } => { - assert_eq!(zone_id, existing[2].0.id); - assert_eq!(expected_pool, some_zpools[2]); - assert_eq!(got_pool, some_zpools[4]); - } - _ => panic!("unexpected error: {err}"), - } - // reconciliation failed, so the contents of our existing configs - // should not have changed (even though a couple of the changes - // were okay, we should either take all or none to maintain - // consistency with the generation-tagged OmicronZonesConfig) - assert_eq!( - existing.iter().map(|(z, _)| z.clone()).collect::>(), - existing_orig, - ); - } - - // Test 5: We have some zones. The new config applies #7229 fix to the - // first zone, doesn't include the remaining zones, and adds some new - // zones. We should see "the remaining zones" removed, the "new zones" - // added, and the 7229-fixed zone not in either set. - { - let mut existing = vec![ - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[0]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[1]), - ), - ( - make_omicron_zone_config(None), - ZpoolOrRamdisk::Zpool(some_zpools[2]), - ), - ]; - let new_request = OmicronZonesConfig { - generation: Generation::new().next(), - zones: vec![ - { - let mut z = existing[0].0.clone(); - z.filesystem_pool = Some(some_zpools[0]); - z - }, - make_omicron_zone_config(None), - make_omicron_zone_config(None), - ], - }; - let reconciled = reconcile_running_zones_with_new_request_impl( - existing.iter_mut().map(|(z, p)| (z, &*p)), - new_request.clone(), - log, - ) - .expect("reconciled successfully"); - - assert_eq!( - reconciled.zones_to_be_removed, - HashSet::from_iter( - existing[1..].iter().map(|(z, _)| z.clone()) - ), - ); - assert_eq!( - reconciled.zones_to_be_added, - HashSet::from_iter(new_request.zones[1..].iter().cloned()), - ); - // Only the first existing zone is being kept; ensure it matches the - // new request. - assert_eq!(existing[0].0, new_request.zones[0]); - } - logctx.cleanup_successful(); - } } diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 59eb99a4d6e..1699c06b3e7 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -25,7 +25,6 @@ use dropshot::TypedBody; use dropshot::endpoint; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::OmicronSledConfig; -use nexus_sled_agent_shared::inventory::OmicronSledConfigResult; use nexus_sled_agent_shared::inventory::SledRole; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledVmmState; @@ -35,8 +34,6 @@ use omicron_common::api::internal::shared::VirtualNetworkInterfaceHost; use omicron_common::api::internal::shared::{ ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, }; -use omicron_common::disk::DatasetsConfig; -use omicron_common::disk::OmicronPhysicalDisksConfig; use range_requests::RequestContextEx; use sled_agent_api::*; use sled_agent_types::boot_disk::BootDiskOsWriteStatus; @@ -339,28 +336,14 @@ impl SledAgentApi for SledAgentSimImpl { )) } - async fn datasets_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.datasets_config_list()?)) - } - - async fn omicron_physical_disks_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_physical_disks_list()?)) - } - async fn omicron_config_put( rqctx: RequestContext, body: TypedBody, - ) -> Result, HttpError> { + ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); - let result = sa.set_omicron_config(body_args)?; - Ok(HttpResponseOk(result)) + sa.set_omicron_config(body_args)?; + Ok(HttpResponseUpdatedNoContent()) } async fn sled_add( @@ -633,12 +616,6 @@ impl SledAgentApi for SledAgentSimImpl { method_unimplemented() } - async fn zpools_get( - _rqctx: RequestContext, - ) -> Result>, HttpError> { - method_unimplemented() - } - async fn sled_role_get( _rqctx: RequestContext, ) -> Result, HttpError> { diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 61adf7e0c59..46fdaa6a356 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -25,8 +25,8 @@ use dropshot::HttpError; use futures::Stream; use nexus_sled_agent_shared::inventory::{ ConfigReconcilerInventoryStatus, Inventory, InventoryDataset, - InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronSledConfigResult, - OmicronZonesConfig, SledRole, + InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronZonesConfig, + SledRole, }; use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, @@ -913,7 +913,9 @@ impl SledAgent { pub fn set_omicron_config( &self, config: OmicronSledConfig, - ) -> Result { + ) -> Result<(), HttpError> { + // TODO Update the simulator to work on `OmicronSledConfig` instead of + // the three separate legacy configs let disks_config = OmicronPhysicalDisksConfig { generation: config.generation, disks: config.disks.into_iter().collect(), @@ -926,16 +928,14 @@ impl SledAgent { generation: config.generation, zones: config.zones.into_iter().collect(), }; - let (disks, datasets) = { - let mut storage = self.storage.lock(); - let DisksManagementResult { status: disks } = - storage.omicron_physical_disks_ensure(disks_config)?; - let DatasetsManagementResult { status: datasets } = - storage.datasets_ensure(datasets_config)?; - (disks, datasets) - }; + + let mut storage = self.storage.lock(); + let _ = storage.omicron_physical_disks_ensure(disks_config)?; + let _ = storage.datasets_ensure(datasets_config)?; *self.fake_zones.lock().unwrap() = zones_config; - Ok(OmicronSledConfigResult { disks, datasets }) + //*self.sled_config.lock().unwrap() = Some(config); + + Ok(()) } pub fn omicron_zones_list(&self) -> OmicronZonesConfig { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index cff825ba474..737c4fe0168 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -4,26 +4,24 @@ //! Sled agent implementation -use crate::artifact_store::ArtifactStore; +use crate::artifact_store::{self, ArtifactStore}; use crate::boot_disk_os_writer::BootDiskOsWriter; use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; use crate::bootstrap::early_networking::EarlyNetworkSetupError; use crate::config::Config; use crate::instance_manager::InstanceManager; use crate::long_running_tasks::LongRunningTaskHandles; -use crate::metrics::MetricsManager; +use crate::metrics::{self, MetricsManager, MetricsRequestQueue}; use crate::nexus::{ NexusClient, NexusNotifierHandle, NexusNotifierInput, NexusNotifierTask, }; -use crate::params::OmicronZoneTypeExt; use crate::probe_manager::ProbeManager; use crate::services::{self, ServiceManager}; -use crate::storage_monitor::StorageMonitorHandle; use crate::support_bundle::logs::SupportBundleLogs; use crate::support_bundle::storage::SupportBundleManager; use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager}; -use crate::zone_bundle; use crate::zone_bundle::BundleError; +use crate::zone_bundle::{self, ZoneBundler}; use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use derive_more::From; @@ -31,15 +29,17 @@ use dropshot::HttpError; use futures::StreamExt; use futures::stream::FuturesUnordered; use illumos_utils::opte::PortManager; +use illumos_utils::running_zone::RunningZone; +use illumos_utils::zpool::ZpoolName; use nexus_sled_agent_shared::inventory::{ - ConfigReconcilerInventoryStatus, Inventory, InventoryDataset, - InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronSledConfigResult, - OmicronZonesConfig, SledRole, + Inventory, OmicronSledConfig, SledRole, }; use omicron_common::address::{ Ipv6Subnet, SLED_PREFIX, get_sled_address, get_switch_zone_address, }; -use omicron_common::api::external::{ByteCount, ByteCountRangeError, Vni}; +use omicron_common::api::external::{ + ByteCount, ByteCountRangeError, Generation, Vni, +}; use omicron_common::api::internal::nexus::{DiskRuntimeState, SledVmmState}; use omicron_common::api::internal::shared::{ ExternalIpGatewayMap, HostPortConfig, RackNetworkConfig, @@ -49,13 +49,14 @@ use omicron_common::api::internal::shared::{ use omicron_common::backoff::{ BackoffError, retry_notify, retry_policy_internal_service_aggressive, }; -use omicron_common::disk::{ - DatasetsConfig, DatasetsManagementResult, DisksManagementResult, - OmicronPhysicalDisksConfig, -}; +use omicron_common::disk::M2Slot; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{GenericUuid, PropolisUuid, SledUuid}; -use sled_agent_api::Zpool; +use sled_agent_config_reconciler::{ + ConfigReconcilerHandle, InternalDisksReceiver, LedgerNewConfigError, + LedgerTaskError, ReconcilerInventory, SledAgentArtifactStore, + SledAgentFacilities, TimeSyncStatus, +}; use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::EarlyNetworkConfig; use sled_agent_types::instance::{ @@ -66,20 +67,21 @@ use sled_agent_types::sled::{BaseboardId, StartSledAgentRequest}; use sled_agent_types::time_sync::TimeSync; use sled_agent_types::zone_bundle::{ BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, - PriorityOrder, StorageLimit, ZoneBundleMetadata, + PriorityOrder, StorageLimit, ZoneBundleCause, ZoneBundleMetadata, }; use sled_diagnostics::SledDiagnosticsCmdError; use sled_diagnostics::SledDiagnosticsCmdOutput; use sled_hardware::{HardwareManager, MemoryReservations, underlay}; use sled_hardware_types::Baseboard; use sled_hardware_types::underlay::BootstrapInterface; -use sled_storage::manager::StorageHandle; +use sled_storage::config::MountConfig; use slog::Logger; use slog_error_chain::InlineErrorChain; use sprockets_tls::keys::SprocketsConfig; use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddrV6}; use std::sync::Arc; +use tufaceous_artifact::ArtifactHash; use uuid::Uuid; use illumos_utils::dladm::Dladm; @@ -115,9 +117,6 @@ pub enum Error { #[error("Failed to operate on underlay device: {0}")] Underlay(#[from] underlay::Error), - #[error("Failed to request firewall rules")] - FirewallRequest(#[source] nexus_client::Error), - #[error(transparent)] Services(#[from] crate::services::Error), @@ -130,9 +129,6 @@ pub enum Error { #[error("Error managing storage: {0}")] Storage(#[from] sled_storage::error::Error), - #[error("Error monitoring storage: {0}")] - StorageMonitor(#[from] crate::storage_monitor::Error), - #[error("Error updating: {0}")] Download(#[from] crate::updates::Error), @@ -171,6 +167,9 @@ pub enum Error { #[error(transparent)] RepoDepotStart(#[from] crate::artifact_store::StartError), + + #[error("Time not yet synchronized")] + TimeNotSynchronized, } impl From for omicron_common::api::external::Error { @@ -338,12 +337,8 @@ struct SledAgentInner { // This is used for idempotence checks during RSS/Add-Sled internal APIs start_request: StartSledAgentRequest, - // Component of Sled Agent responsible for storage and dataset management. - storage: StorageHandle, - - // Component of Sled Agent responsible for monitoring storage and updating - // dump devices. - storage_monitor: StorageMonitorHandle, + // Handle to the sled-agent-config-reconciler system. + config_reconciler: Arc, // Component of Sled Agent responsible for managing Propolis instances. instances: InstanceManager, @@ -357,9 +352,6 @@ struct SledAgentInner { // Other Oxide-controlled services running on this Sled. services: ServiceManager, - // Connection to Nexus. - nexus_client: NexusClient, - // A mechanism for notifiying nexus about sled-agent updates nexus_notifier: NexusNotifierHandle, @@ -382,7 +374,7 @@ struct SledAgentInner { probes: ProbeManager, // Component of Sled Agent responsible for managing the artifact store. - repo_depot: dropshot::HttpServer>, + repo_depot: dropshot::HttpServer>>, } impl SledAgentInner { @@ -430,12 +422,14 @@ impl SledAgent { .cleanup_snapshots() .await; - let storage_manager = &long_running_task_handles.storage_manager; - let boot_disk = storage_manager - .get_latest_disks() - .await - .boot_disk() - .ok_or_else(|| Error::BootDiskNotFound)?; + let config_reconciler = + Arc::clone(&long_running_task_handles.config_reconciler); + let boot_disk_zpool = config_reconciler + .internal_disks_rx() + .current() + .boot_disk_zpool() + .ok_or_else(|| Error::BootDiskNotFound)? + .clone(); // Configure a swap device of the configured size before other system setup. match config.swap_device_size_gb { @@ -443,7 +437,7 @@ impl SledAgent { info!(log, "Requested swap device of size {} GiB", sz); crate::swap_device::ensure_swap_device( &parent_log, - &boot_disk.1, + &boot_disk_zpool, sz, )?; } @@ -456,7 +450,8 @@ impl SledAgent { } info!(log, "Mounting backing filesystems"); - crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk.1).await?; + crate::backing_fs::ensure_backing_fs(&parent_log, &boot_disk_zpool) + .await?; // TODO-correctness Bootstrap-agent already ensures the underlay // etherstub and etherstub VNIC exist on startup - could it pass them @@ -549,7 +544,8 @@ impl SledAgent { nexus_client.clone(), instance_vnic_allocator, port_manager.clone(), - storage_manager.clone(), + config_reconciler.currently_managed_zpools_rx().clone(), + config_reconciler.available_datasets_rx(), long_running_task_handles.zone_bundler.clone(), vmm_reservoir_manager.clone(), metrics_manager.request_queue(), @@ -598,9 +594,30 @@ impl SledAgent { .await .expect( "Expected an infinite retry loop getting \ - network config from bootstore", + network config from bootstore", ); + let artifact_store = Arc::new( + ArtifactStore::new( + &log, + config_reconciler.internal_disks_rx().clone(), + Some(Arc::clone(&config_reconciler)), + ) + .await, + ); + + // Start reconciling against our ledgered sled config. + config_reconciler.spawn_reconciliation_task( + etherstub_vnic, + ReconcilerFacilities { + service_manager: services.clone(), + metrics_queue: metrics_manager.request_queue(), + zone_bundler: long_running_task_handles.zone_bundler.clone(), + }, + SledAgentArtifactStoreWrapper(Arc::clone(&artifact_store)), + &parent_log, + ); + services .sled_agent_started( svc_config, @@ -612,14 +629,8 @@ impl SledAgent { ) .await?; - let repo_depot = ArtifactStore::new( - &log, - storage_manager.clone(), - Some(services.clone()), - ) - .await - .start(sled_address, &config.dropshot) - .await?; + let repo_depot = + artifact_store.start(sled_address, &config.dropshot).await?; // Spawn a background task for managing notifications to nexus // about this sled-agent. @@ -642,27 +653,26 @@ impl SledAgent { request.body.id.into_untyped_uuid(), nexus_client.clone(), etherstub.clone(), - storage_manager.clone(), port_manager.clone(), metrics_manager.request_queue(), + config_reconciler.available_datasets_rx(), log.new(o!("component" => "ProbeManager")), ); + let currently_managed_zpools_rx = + config_reconciler.currently_managed_zpools_rx().clone(); + let sled_agent = SledAgent { inner: Arc::new(SledAgentInner { id: request.body.id, subnet: request.body.subnet, start_request: request, - storage: long_running_task_handles.storage_manager.clone(), - storage_monitor: long_running_task_handles - .storage_monitor_handle - .clone(), + config_reconciler, instances, probes, hardware: long_running_task_handles.hardware_manager.clone(), port_manager, services, - nexus_client, nexus_notifier: nexus_notifier_handle, rack_network_config, zone_bundler: long_running_task_handles.zone_bundler.clone(), @@ -675,7 +685,7 @@ impl SledAgent { sprockets: config.sprockets.clone(), }; - sled_agent.inner.probes.run().await; + sled_agent.inner.probes.run(currently_managed_zpools_rx).await; // We immediately add a notification to the request queue about our // existence. If inspection of the hardware later informs us that we're @@ -686,66 +696,17 @@ impl SledAgent { Ok(sled_agent) } - /// Load services for which we're responsible. - /// - /// Blocks until all services have started, retrying indefinitely on - /// failure. - pub(crate) async fn load_services(&self) { - info!(self.log, "Loading cold boot services"); - retry_notify( - retry_policy_internal_service_aggressive(), - || async { - // Load as many services as we can, and don't exit immediately - // upon failure. - let load_services_result = - self.inner.services.load_services().await.map_err(|err| { - BackoffError::transient(Error::from(err)) - }); - - // If there wasn't any work to do, we're done immediately. - if matches!( - load_services_result, - Ok(services::LoadServicesResult::NoServicesToLoad) - ) { - info!( - self.log, - "load_services exiting early; no services to be loaded" - ); - return Ok(()); - } - - // Otherwise, request firewall rule updates for as many services as - // we can. Note that we still make this request even if we only - // partially load some services. - let firewall_result = self - .request_firewall_update() - .await - .map_err(|err| BackoffError::transient(err)); - - // Only complete if we have loaded all services and firewall - // rules successfully. - load_services_result.and(firewall_result) - }, - |err, delay| { - warn!( - self.log, - "Failed to load services, will retry in {:?}", delay; - "error" => ?err, - ); - }, - ) - .await - .unwrap(); // we retry forever, so this can't fail - } - /// Accesses the [SupportBundleManager] API. pub(crate) fn as_support_bundle_storage(&self) -> SupportBundleManager<'_> { - SupportBundleManager::new(&self.log, self.storage()) + SupportBundleManager::new(&self.log, &*self.inner.config_reconciler) } /// Accesses the [SupportBundleLogs] API. pub(crate) fn as_support_bundle_logs(&self) -> SupportBundleLogs<'_> { - SupportBundleLogs::new(&self.log, self.storage()) + SupportBundleLogs::new( + &self.log, + self.inner.config_reconciler.internal_disks_rx(), + ) } pub(crate) fn switch_zone_underlay_info( @@ -770,20 +731,6 @@ impl SledAgent { self.sprockets.clone() } - /// Requests firewall rules from Nexus. - /// - /// Does not retry upon failure. - async fn request_firewall_update(&self) -> Result<(), Error> { - let sled_id = self.inner.id; - - self.inner - .nexus_client - .sled_firewall_rules_request(&sled_id) - .await - .map_err(|err| Error::FirewallRequest(err))?; - Ok(()) - } - /// Trigger a request to Nexus informing it that the current sled exists, /// with information about the existing set of hardware. pub(crate) async fn notify_nexus_about_self(&self, log: &Logger) { @@ -874,212 +821,13 @@ impl SledAgent { self.inner.zone_bundler.cleanup().await.map_err(Error::from) } - pub async fn datasets_config_list(&self) -> Result { - Ok(self.storage().datasets_config_list().await?) - } - - async fn datasets_ensure( - &self, - config: DatasetsConfig, - ) -> Result { - info!(self.log, "datasets ensure"); - let datasets_result = self.storage().datasets_ensure(config).await?; - info!(self.log, "datasets ensure: Updated storage"); - - // TODO(https://github.com/oxidecomputer/omicron/issues/6177): - // At the moment, we don't actually remove any datasets -- this function - // just adds new datasets. - // - // Once we start removing old datasets, we should probably ensure that - // they are not longer in-use before returning (similar to - // omicron_physical_disks_ensure). - - Ok(datasets_result) - } - - /// Requests the set of physical disks currently managed by the Sled Agent. - /// - /// This should be contrasted by the set of disks in the inventory, which - /// may contain a slightly different set, if certain disks are not expected - /// to be in-use by the broader control plane. - pub async fn omicron_physical_disks_list( - &self, - ) -> Result { - Ok(self.storage().omicron_physical_disks_list().await?) - } - - /// Ensures that the specific set of Omicron Physical Disks are running - /// on this sled, and that no other disks are being used by the control - /// plane (with the exception of M.2s, which are always automatically - /// in-use). - async fn omicron_physical_disks_ensure( - &self, - config: OmicronPhysicalDisksConfig, - ) -> Result { - info!(self.log, "physical disks ensure"); - // Tell the storage subsystem which disks should be managed. - let disk_result = - self.storage().omicron_physical_disks_ensure(config).await?; - info!(self.log, "physical disks ensure: Updated storage"); - - // Grab a view of the latest set of disks, alongside a generation - // number. - // - // This generation is at LEAST as high as our last call through - // omicron_physical_disks_ensure. It may actually be higher, if a - // concurrent operation occurred. - // - // "latest_disks" has a generation number, which is important for other - // subcomponents of Sled Agent to consider. If multiple requests to - // ensure disks arrive concurrently, it's important to "only advance - // forward" as requested by Nexus. - // - // For example: if we receive the following requests concurrently: - // - Use Disks {A, B, C}, generation = 1 - // - Use Disks {A, B, C, D}, generation = 2 - // - // If we ignore generation numbers, it's possible that we start using - // "disk D" -- e.g., for instance filesystems -- and then immediately - // delete it when we process the request with "generation 1". - // - // By keeping these requests ordered, we prevent this thrashing, and - // ensure that we always progress towards the last-requested state. - let latest_disks = self.storage().get_latest_disks().await; - let our_gen = latest_disks.generation(); - info!(self.log, "physical disks ensure: Propagating new generation of disks"; "generation" => ?our_gen); - - // Ensure that the StorageMonitor, and the dump devices, have committed - // to start using new disks and stop using old ones. - self.inner.storage_monitor.await_generation(*our_gen).await?; - info!(self.log, "physical disks ensure: Updated storage monitor"); - - // Ensure that the ZoneBundler, if it was creating a bundle referencing - // the old U.2s, has stopped using them. - self.inner.zone_bundler.await_completion_of_prior_bundles().await; - info!(self.log, "physical disks ensure: Updated zone bundler"); - - // Ensure that all probes, at least after our call to - // "omicron_physical_disks_ensure", stop using any disks that - // may have been in-service from before that request. - self.inner.probes.use_only_these_disks(&latest_disks).await; - info!(self.log, "physical disks ensure: Updated probes"); - - // Do the same for instances - mark them failed if they were using - // expunged disks. - self.inner.instances.use_only_these_disks(latest_disks).await?; - info!(self.log, "physical disks ensure: Updated instances"); - - Ok(disk_result) - } - /// Ensures that the specific sets of disks, datasets, and zones specified /// by `config` are running. - /// - /// This method currently blocks while each of disks, datasets, and zones - /// are ensured in that order; a failure on one prevents any attempt to - /// ensure the subsequent step(s). pub async fn set_omicron_config( &self, config: OmicronSledConfig, - ) -> Result { - // Until the config-reconciler work lands: unpack the unified config - // into the three split configs for indepenedent ledgering. - let disks_config = OmicronPhysicalDisksConfig { - generation: config.generation, - disks: config.disks.into_iter().collect(), - }; - - let disks = self.omicron_physical_disks_ensure(disks_config).await?; - - // If we only had partial success deploying disks, don't proceed. - if disks.has_error() { - return Ok(OmicronSledConfigResult { - disks: disks.status, - datasets: Vec::new(), - }); - } - - let datasets_config = DatasetsConfig { - generation: config.generation, - datasets: config.datasets.into_iter().map(|d| (d.id, d)).collect(), - }; - - let datasets = self.datasets_ensure(datasets_config).await?; - - // If we only had partial success deploying datasets, don't proceed. - if datasets.has_error() { - return Ok(OmicronSledConfigResult { - disks: disks.status, - datasets: datasets.status, - }); - } - - let zones_config = OmicronZonesConfig { - generation: config.generation, - zones: config.zones.into_iter().collect(), - }; - - self.omicron_zones_ensure(zones_config).await?; - - Ok(OmicronSledConfigResult { - disks: disks.status, - datasets: datasets.status, - }) - } - - /// Ensures that the specific set of Omicron zones are running as configured - /// (and that no other zones are running) - async fn omicron_zones_ensure( - &self, - requested_zones: OmicronZonesConfig, - ) -> Result<(), Error> { - // TODO(https://github.com/oxidecomputer/omicron/issues/6043): - // - If these are the set of filesystems, we should also consider - // removing the ones which are not listed here. - // - It's probably worth sending a bulk request to the storage system, - // rather than requesting individual datasets. - for zone in &requested_zones.zones { - let Some(dataset_name) = zone.dataset_name() else { - continue; - }; - - // NOTE: This code will be deprecated by https://github.com/oxidecomputer/omicron/pull/7160 - // - // However, we need to ensure that all blueprints have datasets - // within them before we can remove this back-fill. - // - // Therefore, we do something hairy here: We ensure the filesystem - // exists, but don't specify any dataset UUID value. - // - // This means that: - // - If the dataset exists and has a UUID, this will be a no-op - // - If the dataset doesn't exist, it'll be created without its - // oxide:uuid zfs property set - // - If a subsequent call to "datasets_ensure" tries to set a UUID, - // it should be able to get set (once). - self.inner.storage.upsert_filesystem(None, dataset_name).await?; - } - - self.inner - .services - .ensure_all_omicron_zones_persistent(requested_zones) - .await?; - Ok(()) - } - - /// Gets the sled's current list of all zpools. - pub async fn zpools_get(&self) -> Vec { - self.inner - .storage - .get_latest_disks() - .await - .get_all_zpools() - .into_iter() - .map(|(name, variant)| Zpool { - id: name.id(), - disk_type: variant.into(), - }) - .collect() + ) -> Result, LedgerTaskError> { + self.inner.config_reconciler.set_sled_config(config).await } /// Returns whether or not the sled believes itself to be a scrimlet @@ -1192,7 +940,7 @@ impl SledAgent { todo!("Disk attachment not yet implemented"); } - pub fn artifact_store(&self) -> &ArtifactStore { + pub fn artifact_store(&self) -> &ArtifactStore { &self.inner.repo_depot.app_private() } @@ -1249,7 +997,18 @@ impl SledAgent { /// Gets the sled's current time synchronization state pub async fn timesync_get(&self) -> Result { - self.inner.services.timesync_get().await.map_err(Error::from) + let status = self.inner.config_reconciler.timesync_status(); + + // TODO-cleanup we could give a more specific error cause in the + // `FailedToGetSyncStatus` case. + match status { + TimeSyncStatus::NotYetChecked + | TimeSyncStatus::ConfiguredToSkip + | TimeSyncStatus::FailedToGetSyncStatus(_) => { + Err(Error::TimeNotSynchronized) + } + TimeSyncStatus::TimeSync(time_sync) => Ok(time_sync), + } } pub async fn ensure_scrimlet_host_ports( @@ -1314,8 +1073,15 @@ impl SledAgent { Ok(()) } - pub(crate) fn storage(&self) -> &StorageHandle { - &self.inner.storage + pub(crate) fn boot_image_raw_devfs_path( + &self, + slot: M2Slot, + ) -> Option>> { + self.inner + .config_reconciler + .internal_disks_rx() + .current() + .image_raw_devfs_path(slot) } pub(crate) fn boot_disk_os_writer(&self) -> &BootDiskOsWriter { @@ -1359,94 +1125,14 @@ impl SledAgent { let sled_role = if is_scrimlet { SledRole::Scrimlet } else { SledRole::Gimlet }; - let mut disks = vec![]; - let mut zpools = vec![]; - let mut datasets = vec![]; - let (all_disks, disks_config, datasets_config, omicron_zones) = tokio::join!( - self.storage().get_latest_disks(), - self.storage().omicron_physical_disks_list(), - self.storage().datasets_config_list(), - self.inner.services.omicron_zones_list() - ); - - // RSS asks for our inventory _before_ it sends us an - // `OmicronSledConfig`; echo back the default (empty) disk and dataset - // configs if we have no ledger at all. - let disks_config = match disks_config { - Ok(disks_config) => disks_config, - Err(sled_storage::error::Error::LedgerNotFound) => { - OmicronPhysicalDisksConfig::default() - } - Err(err) => return Err(InventoryError::GetDisksConfig(err)), - }; - let datasets_config = match datasets_config { - Ok(datasets_config) => datasets_config, - Err(sled_storage::error::Error::LedgerNotFound) => { - DatasetsConfig::default() - } - Err(err) => return Err(InventoryError::GetDatasetsConfig(err)), - }; - - for (identity, variant, slot, firmware) in all_disks.iter_all() { - disks.push(InventoryDisk { - identity: identity.clone(), - variant, - slot, - active_firmware_slot: firmware.active_slot(), - next_active_firmware_slot: firmware.next_active_slot(), - number_of_firmware_slots: firmware.number_of_slots(), - slot1_is_read_only: firmware.slot1_read_only(), - slot_firmware_versions: firmware.slots().to_vec(), - }); - } - for zpool in all_disks.all_u2_zpools() { - let info = - match illumos_utils::zpool::Zpool::get_info(&zpool.to_string()) - .await - { - Ok(info) => info, - Err(err) => { - warn!( - self.log, - "Failed to access zpool info"; - "zpool" => %zpool, - "err" => %err - ); - continue; - } - }; - - zpools.push(InventoryZpool { - id: zpool.id(), - total_size: ByteCount::try_from(info.size())?, - }); - - let inv_props = match self.storage().datasets_list(zpool).await { - Ok(props) => { - props.into_iter().map(|prop| InventoryDataset::from(prop)) - } - Err(err) => { - warn!( - self.log, - "Failed to access dataset info within zpool"; - "zpool" => %zpool, - "err" => %err - ); - continue; - } - }; - datasets.extend(inv_props); - } - - // Reassemble our combined sled config from its separate pieces. (This - // will go away once we start ledgering the config as a single unit.) - let sled_config = OmicronSledConfig { - generation: omicron_zones.generation, - disks: disks_config.disks.into_iter().collect(), - datasets: datasets_config.datasets.into_values().collect(), - zones: omicron_zones.zones.into_iter().collect(), - remove_mupdate_override: None, - }; + let ReconcilerInventory { + disks, + zpools, + datasets, + ledgered_sled_config, + reconciler_status, + last_reconciliation, + } = self.inner.config_reconciler.inventory(); Ok(Inventory { sled_id, @@ -1459,11 +1145,9 @@ impl SledAgent { disks, zpools, datasets, - // These fields will come from the reconciler once it's integrated. - // For now, we can report our ledgered config but nothing else. - ledgered_sled_config: Some(sled_config), - reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, - last_reconciliation: None, + ledgered_sled_config, + reconciler_status, + last_reconciliation, }) } @@ -1625,3 +1309,77 @@ pub async fn sled_add( info!(log, "Peer agent initialized"; "peer_bootstrap_addr" => %bootstrap_addr, "peer_id" => %baseboard); Ok(()) } + +struct ReconcilerFacilities { + service_manager: ServiceManager, + metrics_queue: MetricsRequestQueue, + zone_bundler: ZoneBundler, +} + +impl SledAgentFacilities for ReconcilerFacilities { + type StartZoneError = services::Error; + type MetricsUntrackZoneLinksError = Vec; + type ZoneBundleCreateError = BundleError; + + async fn on_time_sync(&self) { + self.service_manager.on_time_sync().await + } + + async fn start_omicron_zone( + &self, + zone_config: &OmicronZoneConfig, + mount_config: &MountConfig, + is_time_synchronized: bool, + all_u2_pools: &[ZpoolName], + ) -> Result { + self.service_manager + .start_omicron_zone( + mount_config, + zone_config, + is_time_synchronized, + all_u2_pools, + ) + .await + } + + fn metrics_untrack_zone_links( + &self, + zone: &RunningZone, + ) -> Result<(), Self::MetricsUntrackZoneLinksError> { + self.metrics_queue.untrack_zone_links(zone) + } + + fn ddm_add_internal_dns_prefix(&self, prefix: Ipv6Subnet) { + self.service_manager.ddm_reconciler().add_internal_dns_subnet(prefix); + } + + fn ddm_remove_internal_dns_prefix(&self, prefix: Ipv6Subnet) { + self.service_manager + .ddm_reconciler() + .remove_internal_dns_subnet(prefix); + } + + async fn zone_bundle_create( + &self, + zone: &RunningZone, + cause: ZoneBundleCause, + ) -> Result<(), Self::ZoneBundleCreateError> { + self.zone_bundler.create(zone, cause).await?; + Ok(()) + } +} + +// Workaround wrapper for orphan rules. +struct SledAgentArtifactStoreWrapper(Arc>); + +impl SledAgentArtifactStore for SledAgentArtifactStoreWrapper { + type ArtifactExistsValidationError = artifact_store::Error; + + async fn validate_artifact_exists_in_storage( + &self, + artifact: ArtifactHash, + ) -> Result<(), Self::ArtifactExistsValidationError> { + self.0.get(artifact).await?; + Ok(()) + } +} diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs deleted file mode 100644 index 626d81d54ff..00000000000 --- a/sled-agent/src/storage_monitor.rs +++ /dev/null @@ -1,116 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! A task that listens for storage events from [`sled_storage::manager::StorageManager`] -//! and dispatches them to other parts of the bootstrap agent and sled agent -//! code. - -use omicron_common::api::external::Generation; -use sled_agent_config_reconciler::dump_setup::DumpSetup; -use sled_storage::config::MountConfig; -use sled_storage::manager::StorageHandle; -use sled_storage::resources::AllDisks; -use slog::Logger; -use std::sync::Arc; -use tokio::sync::watch; - -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("Storage Monitor no longer running")] - NotRunning, -} - -pub struct StorageMonitor { - log: Logger, - storage_manager: StorageHandle, - - // Invokes dumpadm(8) and savecore(8) when new disks are encountered - dump_setup: DumpSetup, - - tx: watch::Sender, -} - -/// Emits status about storage monitoring. -#[derive(Debug, Clone)] -pub struct StorageMonitorStatus { - /// The latest generation of physical disks to be processed - /// by the storage monitor. - pub latest_gen: Option, -} - -impl StorageMonitorStatus { - fn new() -> Self { - Self { latest_gen: None } - } -} - -#[derive(Clone)] -pub struct StorageMonitorHandle { - rx: watch::Receiver, -} - -impl StorageMonitorHandle { - pub async fn await_generation( - &self, - wanted: Generation, - ) -> Result<(), Error> { - self.rx - .clone() - .wait_for(|status| { - let Some(observed) = status.latest_gen else { - return false; - }; - return observed >= wanted; - }) - .await - .map_err(|_| Error::NotRunning)?; - Ok(()) - } -} - -impl StorageMonitor { - pub fn new( - log: &Logger, - mount_config: MountConfig, - storage_manager: StorageHandle, - ) -> (StorageMonitor, StorageMonitorHandle) { - let dump_setup = DumpSetup::new(&log, Arc::new(mount_config)); - let log = log.new(o!("component" => "StorageMonitor")); - let (tx, rx) = watch::channel(StorageMonitorStatus::new()); - ( - StorageMonitor { log, storage_manager, dump_setup, tx }, - StorageMonitorHandle { rx }, - ) - } - - /// Run the main receive loop of the `StorageMonitor` - /// - /// This should be spawned into a tokio task - pub async fn run(mut self) { - loop { - tokio::select! { - disks = self.storage_manager.wait_for_changes() => { - info!( - self.log, - "Received storage manager update"; - "disks" => ?disks - ); - self.handle_resource_update(disks).await; - } - } - } - } - - async fn handle_resource_update(&mut self, updated_disks: AllDisks) { - let generation = updated_disks.generation(); - self.dump_setup - .update_dumpdev_setup( - updated_disks.iter_managed().map(|(_id, disk)| disk), - ) - .await; - self.tx.send_replace(StorageMonitorStatus { - latest_gen: Some(*generation), - }); - } -} diff --git a/sled-agent/src/support_bundle/logs.rs b/sled-agent/src/support_bundle/logs.rs index 3467c8663d0..c68a3bdedde 100644 --- a/sled-agent/src/support_bundle/logs.rs +++ b/sled-agent/src/support_bundle/logs.rs @@ -7,7 +7,7 @@ use camino_tempfile::tempfile_in; use dropshot::HttpError; use range_requests::make_get_response; -use sled_storage::manager::StorageHandle; +use sled_agent_config_reconciler::InternalDisksReceiver; use slog::Logger; use slog_error_chain::InlineErrorChain; use tokio::io::AsyncSeekExt; @@ -43,12 +43,15 @@ impl From for HttpError { pub struct SupportBundleLogs<'a> { log: &'a Logger, - sled_storage: &'a StorageHandle, + internal_disks_rx: &'a InternalDisksReceiver, } impl<'a> SupportBundleLogs<'a> { - pub fn new(log: &'a Logger, sled_storage: &'a StorageHandle) -> Self { - Self { log, sled_storage } + pub fn new( + log: &'a Logger, + internal_disks_rx: &'a InternalDisksReceiver, + ) -> Self { + Self { log, internal_disks_rx } } /// Get a list of zones on a sled containing logs that we want to include in @@ -77,12 +80,9 @@ impl<'a> SupportBundleLogs<'a> { { // We are using an M.2 device for temporary storage to assemble a zip // file made up of all of the discovered zone's logs. - let m2_debug_datasets = self - .sled_storage - .get_latest_disks() - .await - .all_sled_diagnostics_directories(); - let tempdir = m2_debug_datasets.first().ok_or(Error::MissingStorage)?; + let current_internal_disks = self.internal_disks_rx.current(); + let mut m2_debug_datasets = current_internal_disks.all_debug_datasets(); + let tempdir = m2_debug_datasets.next().ok_or(Error::MissingStorage)?; let mut tempfile = tempfile_in(tempdir)?; let log = self.log.clone(); diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index 4e3d781e146..ad126e9e1b5 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -14,6 +14,7 @@ use futures::Stream; use futures::StreamExt; use illumos_utils::zfs::DatasetProperties; use omicron_common::api::external::Error as ExternalError; +use omicron_common::api::external::Generation; use omicron_common::disk::CompressionAlgorithm; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetName; @@ -28,15 +29,16 @@ use range_requests::PotentialRange; use range_requests::SingleRange; use sha2::{Digest, Sha256}; use sled_agent_api::*; +use sled_agent_config_reconciler::ConfigReconcilerHandle; +use sled_agent_config_reconciler::DatasetTaskError; use sled_agent_types::support_bundle::BUNDLE_FILE_NAME; use sled_agent_types::support_bundle::BUNDLE_TMP_FILE_NAME_SUFFIX; use sled_storage::manager::NestedDatasetConfig; use sled_storage::manager::NestedDatasetListOptions; use sled_storage::manager::NestedDatasetLocation; -use sled_storage::manager::StorageHandle; use slog::Logger; use slog_error_chain::InlineErrorChain; -use std::borrow::Cow; +use std::collections::BTreeMap; use std::io::Write; use tokio::io::AsyncReadExt; use tokio::io::AsyncSeekExt; @@ -86,6 +88,9 @@ pub enum Error { #[error(transparent)] Zip(#[from] ZipError), + + #[error(transparent)] + DatasetTask(#[from] DatasetTaskError), } fn err_str(err: &dyn std::error::Error) -> String { @@ -144,7 +149,6 @@ pub trait LocalStorage: Sync { async fn dyn_ensure_mounted_and_get_mountpoint( &self, dataset: NestedDatasetLocation, - mount_root: &Utf8Path, ) -> Result; /// Returns all nested datasets within an existing dataset @@ -165,19 +169,29 @@ pub trait LocalStorage: Sync { &self, name: NestedDatasetLocation, ) -> Result<(), Error>; - - /// Returns the root filesystem path where datasets are mounted. - /// - /// This is typically "/" in prod, but can be a temporary directory - /// for tests to isolate storage that typically appears globally. - fn zpool_mountpoint_root(&self) -> Cow; } /// This implementation is effectively a pass-through to the real methods #[async_trait] -impl LocalStorage for StorageHandle { +impl LocalStorage for ConfigReconcilerHandle { async fn dyn_datasets_config_list(&self) -> Result { - self.datasets_config_list().await.map_err(|err| err.into()) + // TODO-cleanup This is super gross; add a better API (maybe fetch a + // single dataset by ID, since that's what our caller wants?) + Ok(self + .inventory() + .ledgered_sled_config + .map(|sled_config| DatasetsConfig { + generation: sled_config.generation, + datasets: sled_config + .datasets + .into_iter() + .map(|d| (d.id, d)) + .collect(), + }) + .unwrap_or_else(|| DatasetsConfig { + generation: Generation::new(), + datasets: BTreeMap::new(), + })) } async fn dyn_dataset_get( @@ -205,12 +219,10 @@ impl LocalStorage for StorageHandle { async fn dyn_ensure_mounted_and_get_mountpoint( &self, dataset: NestedDatasetLocation, - mount_root: &Utf8Path, ) -> Result { - dataset - .ensure_mounted_and_get_mountpoint(mount_root) + self.nested_dataset_ensure_mounted(dataset) .await - .map_err(Error::from) + .map_err(|err| err.into()) } async fn dyn_nested_dataset_list( @@ -234,10 +246,6 @@ impl LocalStorage for StorageHandle { ) -> Result<(), Error> { self.nested_dataset_destroy(name).await.map_err(|err| err.into()) } - - fn zpool_mountpoint_root(&self) -> Cow { - Cow::Borrowed(self.mount_config().root.as_path()) - } } /// This implementation allows storage bundles to be stored on simulated storage @@ -257,10 +265,10 @@ impl LocalStorage for crate::sim::Storage { async fn dyn_ensure_mounted_and_get_mountpoint( &self, dataset: NestedDatasetLocation, - mount_root: &Utf8Path, ) -> Result { + let slf = self.lock(); // Simulated storage treats all datasets as mounted. - Ok(dataset.mountpoint(mount_root)) + Ok(dataset.mountpoint(slf.root())) } async fn dyn_nested_dataset_list( @@ -284,10 +292,6 @@ impl LocalStorage for crate::sim::Storage { ) -> Result<(), Error> { self.lock().nested_dataset_destroy(name).map_err(|err| err.into()) } - - fn zpool_mountpoint_root(&self) -> Cow { - Cow::Owned(self.lock().root().to_path_buf()) - } } /// Describes the type of access to the support bundle @@ -512,10 +516,7 @@ impl<'a> SupportBundleManager<'a> { // The dataset for a support bundle exists. let support_bundle_path = self .storage - .dyn_ensure_mounted_and_get_mountpoint( - dataset.name, - &self.storage.zpool_mountpoint_root(), - ) + .dyn_ensure_mounted_and_get_mountpoint(dataset.name) .await? .join(BUNDLE_FILE_NAME); @@ -625,13 +626,8 @@ impl<'a> SupportBundleManager<'a> { info!(log, "Dataset does exist for bundle"); // The mounted root of the support bundle dataset - let support_bundle_dir = self - .storage - .dyn_ensure_mounted_and_get_mountpoint( - dataset, - &self.storage.zpool_mountpoint_root(), - ) - .await?; + let support_bundle_dir = + self.storage.dyn_ensure_mounted_and_get_mountpoint(dataset).await?; let support_bundle_path = support_bundle_dir.join(BUNDLE_FILE_NAME); let support_bundle_path_tmp = support_bundle_dir.join(format!( "{}-{BUNDLE_TMP_FILE_NAME_SUFFIX}", @@ -737,13 +733,8 @@ impl<'a> SupportBundleManager<'a> { NestedDatasetLocation { path: support_bundle_id.to_string(), root }; // The mounted root of the support bundle dataset - let support_bundle_dir = self - .storage - .dyn_ensure_mounted_and_get_mountpoint( - dataset, - &self.storage.zpool_mountpoint_root(), - ) - .await?; + let support_bundle_dir = + self.storage.dyn_ensure_mounted_and_get_mountpoint(dataset).await?; let path = support_bundle_dir.join(BUNDLE_FILE_NAME); let f = tokio::fs::File::open(&path).await?; @@ -944,11 +935,80 @@ mod tests { use omicron_common::disk::DatasetsConfig; use omicron_common::zpool_name::ZpoolName; use omicron_test_utils::dev::test_setup_log; + use sled_storage::manager::StorageHandle; use sled_storage::manager_test_harness::StorageManagerTestHarness; use std::collections::BTreeMap; use zip::ZipWriter; use zip::write::SimpleFileOptions; + // TODO-cleanup Should we rework these tests to not use StorageHandle (real + // code now goes through `ConfigReconcilerHandle`)? + #[async_trait] + impl LocalStorage for StorageHandle { + async fn dyn_datasets_config_list( + &self, + ) -> Result { + self.datasets_config_list().await.map_err(|err| err.into()) + } + + fn dyn_dataset_get( + &self, + dataset_name: &String, + ) -> Result { + let Some(dataset) = + illumos_utils::zfs::Zfs::get_dataset_properties( + &[dataset_name.clone()], + illumos_utils::zfs::WhichDatasets::SelfOnly, + ) + .map_err(|err| Error::DatasetLookup(err))? + .pop() + else { + // This should not be possible, unless the "zfs get" command is + // behaving unpredictably. We're only asking for a single dataset, + // so on success, we should see the result of that dataset. + return Err(Error::DatasetLookup(anyhow::anyhow!( + "Zfs::get_dataset_properties returned an empty vec?" + ))); + }; + + Ok(dataset) + } + + async fn dyn_ensure_mounted_and_get_mountpoint( + &self, + dataset: NestedDatasetLocation, + ) -> Result { + dataset + .ensure_mounted_and_get_mountpoint(&self.mount_config().root) + .await + .map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_list( + &self, + name: NestedDatasetLocation, + options: NestedDatasetListOptions, + ) -> Result, Error> { + self.nested_dataset_list(name, options) + .await + .map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_ensure( + &self, + config: NestedDatasetConfig, + ) -> Result<(), Error> { + self.nested_dataset_ensure(config).await.map_err(|err| err.into()) + } + + async fn dyn_nested_dataset_destroy( + &self, + name: NestedDatasetLocation, + ) -> Result<(), Error> { + self.nested_dataset_destroy(name).await.map_err(|err| err.into()) + } + } + struct SingleU2StorageHarness { storage_test_harness: StorageManagerTestHarness, zpool_id: ZpoolUuid, diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 24a13b8b2a2..5df159a6c8b 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -27,9 +27,9 @@ use illumos_utils::zfs::Snapshot; use illumos_utils::zfs::ZFS; use illumos_utils::zfs::Zfs; use illumos_utils::zone::AdmError; +use sled_agent_config_reconciler::AvailableDatasetsReceiver; +use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::zone_bundle::*; -use sled_storage::dataset::U2_DEBUG_DATASET; -use sled_storage::manager::StorageHandle; use slog::Logger; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -139,7 +139,8 @@ pub struct ZoneBundler { // State shared between tasks, e.g., used when creating a bundle in different // tasks or between a creation and cleanup. struct Inner { - storage_handle: StorageHandle, + internal_disks_rx: InternalDisksReceiver, + available_datasets_rx: AvailableDatasetsReceiver, cleanup_context: CleanupContext, last_cleanup_at: Instant, } @@ -167,11 +168,11 @@ impl Inner { // that can exist but do not, i.e., those whose parent datasets already // exist; and returns those. async fn bundle_directories(&self) -> Vec { - let resources = self.storage_handle.get_latest_disks().await; // NOTE: These bundle directories are always stored on M.2s, so we don't // need to worry about synchronizing with U.2 disk expungement at the // callsite. - let expected = resources.all_zone_bundle_directories(); + let internal_disks = self.internal_disks_rx.current(); + let expected = internal_disks.all_zone_bundle_directories(); let mut out = Vec::with_capacity(expected.len()); for each in expected.into_iter() { if tokio::fs::create_dir_all(&each).await.is_ok() { @@ -236,7 +237,8 @@ impl ZoneBundler { /// to clean them up to free up space. pub async fn new( log: Logger, - storage_handle: StorageHandle, + internal_disks_rx: InternalDisksReceiver, + available_datasets_rx: AvailableDatasetsReceiver, cleanup_context: CleanupContext, ) -> Self { // This is compiled out in tests because there's no way to set our @@ -255,7 +257,8 @@ impl ZoneBundler { .expect("Failed to initialize existing ZFS resources"); let notify_cleanup = Arc::new(Notify::new()); let inner = Arc::new(Mutex::new(Inner { - storage_handle, + internal_disks_rx, + available_datasets_rx, cleanup_context, last_cleanup_at: Instant::now(), })); @@ -356,9 +359,9 @@ impl ZoneBundler { // prior bundles have completed. let inner = self.inner.lock().await; let storage_dirs = inner.bundle_directories().await; - let resources = inner.storage_handle.get_latest_disks().await; - let extra_log_dirs = resources - .all_u2_mountpoints(U2_DEBUG_DATASET) + let extra_log_dirs = inner + .available_datasets_rx + .all_mounted_debug_datasets() .into_iter() .map(|pool_path| pool_path.path) .collect(); @@ -1766,7 +1769,10 @@ mod illumos_tests { use chrono::TimeZone; use chrono::Timelike; use chrono::Utc; + use omicron_common::disk::DiskIdentity; use rand::RngCore; + use sled_agent_config_reconciler::AvailableDatasetsReceiver; + use sled_agent_config_reconciler::InternalDisksReceiver; use sled_storage::manager_test_harness::StorageManagerTestHarness; use slog::Drain; use slog::Logger; @@ -1891,9 +1897,38 @@ mod illumos_tests { let log = test_logger(); let context = CleanupContext::default(); let resource_wrapper = ResourceWrapper::new(&log).await; + let handle = resource_wrapper.storage_test_harness.handle(); + let all_disks = handle.get_latest_disks().await; + + // Convert from StorageManagerTestHarness to config-reconciler channels. + // Do we want to expand config-reconciler test support and not use + // StorageManagerTestHarness? + let internal_disks_rx = InternalDisksReceiver::fake_static( + Arc::new(all_disks.mount_config().clone()), + all_disks.all_m2_zpools().into_iter().enumerate().map( + |(i, zpool)| { + ( + DiskIdentity { + vendor: format!("test-vendor-{i}"), + model: format!("test-model-{i}"), + serial: format!("test-serial-{i}"), + }, + zpool, + ) + }, + ), + ); + let available_datasets_rx = AvailableDatasetsReceiver::fake_static( + all_disks + .all_m2_zpools() + .into_iter() + .zip(all_disks.all_m2_mountpoints(".")), + ); + let bundler = ZoneBundler::new( log, - resource_wrapper.storage_test_harness.handle().clone(), + internal_disks_rx, + available_datasets_rx, context, ) .await; From ce36cb7b7440aeb532713a72dc279b29253aa5ba Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 30 Apr 2025 15:16:34 -0400 Subject: [PATCH 02/10] use new ConfigReconcilerSpawnToken interface --- sled-agent/src/bootstrap/pre_server.rs | 4 ++++ sled-agent/src/bootstrap/server.rs | 26 +++++++++++++++++++++----- sled-agent/src/long_running_tasks.rs | 18 +++++++++++------- sled-agent/src/server.rs | 3 +++ sled-agent/src/sled_agent.rs | 9 +++++---- 5 files changed, 44 insertions(+), 16 deletions(-) diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 546a523e3e0..094b44e6c31 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -35,6 +35,7 @@ use illumos_utils::zone::Api; use illumos_utils::zone::Zones; use omicron_common::FileKv; use omicron_common::address::Ipv6Subnet; +use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; use sled_hardware::DendriteAsic; use sled_hardware::SledMode; use sled_hardware::underlay; @@ -53,6 +54,7 @@ pub(super) struct BootstrapAgentStartup { pub(super) service_manager: ServiceManager, pub(super) long_running_task_handles: LongRunningTaskHandles, pub(super) sled_agent_started_tx: oneshot::Sender, + pub(super) config_reconciler_spawn_token: ConfigReconcilerSpawnToken, } impl BootstrapAgentStartup { @@ -120,6 +122,7 @@ impl BootstrapAgentStartup { // the process and are used by both the bootstrap agent and sled agent let ( long_running_task_handles, + config_reconciler_spawn_token, sled_agent_started_tx, service_manager_ready_tx, ) = spawn_all_longrunning_tasks( @@ -161,6 +164,7 @@ impl BootstrapAgentStartup { service_manager, long_running_task_handles, sled_agent_started_tx, + config_reconciler_spawn_token, }) } } diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 9788bb943ad..d18b42a3466 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -39,6 +39,7 @@ use omicron_ddm_admin_client::DdmError; use omicron_ddm_admin_client::types::EnableStatsRequest; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::RackInitUuid; +use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::rack_init::RackInitializeRequest; use sled_agent_types::sled::StartSledAgentRequest; @@ -180,6 +181,7 @@ impl Server { service_manager, long_running_task_handles, sled_agent_started_tx, + config_reconciler_spawn_token, } = BootstrapAgentStartup::run(config).await?; // Do we have a StartSledAgentRequest stored in the ledger? @@ -246,6 +248,7 @@ impl Server { &config, start_sled_agent_request, long_running_task_handles.clone(), + config_reconciler_spawn_token, service_manager.clone(), &base_log, &startup_log, @@ -261,7 +264,10 @@ impl Server { SledAgentState::ServerStarted(sled_agent_server) } else { - SledAgentState::Bootstrapping(Some(sled_agent_started_tx)) + SledAgentState::Bootstrapping(Some(BootstrappingDependencies { + sled_agent_started_tx, + config_reconciler_spawn_token, + })) }; // Spawn our inner task that handles any future hardware updates and any @@ -303,11 +309,16 @@ impl Server { // bootstrap server). enum SledAgentState { // We're still in the bootstrapping phase, waiting for a sled-agent request. - Bootstrapping(Option>), + Bootstrapping(Option), // ... or the sled agent server is running. ServerStarted(SledAgentServer), } +struct BootstrappingDependencies { + sled_agent_started_tx: oneshot::Sender, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, +} + #[derive(thiserror::Error, Debug)] pub enum SledAgentServerStartError { #[error("Failed to start sled-agent server: {0}")] @@ -347,6 +358,7 @@ async fn start_sled_agent( config: &SledConfig, request: StartSledAgentRequest, long_running_task_handles: LongRunningTaskHandles, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, service_manager: ServiceManager, base_log: &Logger, log: &Logger, @@ -398,6 +410,7 @@ async fn start_sled_agent( base_log.clone(), request.clone(), long_running_task_handles.clone(), + config_reconciler_spawn_token, service_manager, ) .await @@ -531,7 +544,7 @@ impl Inner { log: &Logger, ) { match &mut self.state { - SledAgentState::Bootstrapping(sled_agent_started_tx) => { + SledAgentState::Bootstrapping(deps) => { let request_id = request.body.id.into_untyped_uuid(); // Extract from options to satisfy the borrow checker. @@ -540,13 +553,16 @@ impl Inner { // we explicitly unwrap here, and panic on error below. // // See https://github.com/oxidecomputer/omicron/issues/4494 - let sled_agent_started_tx = - sled_agent_started_tx.take().unwrap(); + let BootstrappingDependencies { + sled_agent_started_tx, + config_reconciler_spawn_token, + } = deps.take().unwrap(); let response = match start_sled_agent( &self.config, request, self.long_running_task_handles.clone(), + config_reconciler_spawn_token, self.service_manager.clone(), &self.base_log, &log, diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs index 37b646b8a44..ff9a6a53251 100644 --- a/sled-agent/src/long_running_tasks.rs +++ b/sled-agent/src/long_running_tasks.rs @@ -25,7 +25,8 @@ use bootstore::schemes::v0 as bootstore; use illumos_utils::zpool::ZpoolName; use key_manager::{KeyManager, StorageKeyRequester}; use sled_agent_config_reconciler::{ - ConfigReconcilerHandle, RawDisksSender, TimeSyncConfig, + ConfigReconcilerHandle, ConfigReconcilerSpawnToken, RawDisksSender, + TimeSyncConfig, }; use sled_agent_types::zone_bundle::CleanupContext; use sled_agent_zone_images::{ZoneImageSourceResolver, ZoneImageZpools}; @@ -70,6 +71,7 @@ pub async fn spawn_all_longrunning_tasks( config: &Config, ) -> ( LongRunningTaskHandles, + ConfigReconcilerSpawnToken, oneshot::Sender, oneshot::Sender, ) { @@ -80,12 +82,13 @@ pub async fn spawn_all_longrunning_tasks( } else { TimeSyncConfig::Normal }; - let mut config_reconciler = ConfigReconcilerHandle::new( - MountConfig::default(), - storage_key_requester, - time_sync_config, - log, - ); + let (mut config_reconciler, config_reconciler_spawn_token) = + ConfigReconcilerHandle::new( + MountConfig::default(), + storage_key_requester, + time_sync_config, + log, + ); let nongimlet_observed_disks = config.nongimlet_observed_disks.clone().unwrap_or(vec![]); @@ -127,6 +130,7 @@ pub async fn spawn_all_longrunning_tasks( zone_bundler, zone_image_resolver, }, + config_reconciler_spawn_token, sled_agent_started_tx, service_manager_ready_tx, ) diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index f60f367d280..15a5c2f784d 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -12,6 +12,7 @@ use crate::nexus::make_nexus_client; use crate::services::ServiceManager; use internal_dns_resolver::Resolver; use omicron_uuid_kinds::SledUuid; +use sled_agent_config_reconciler::ConfigReconcilerSpawnToken; use sled_agent_types::sled::StartSledAgentRequest; use slog::Logger; use std::net::SocketAddr; @@ -39,6 +40,7 @@ impl Server { log: Logger, request: StartSledAgentRequest, long_running_tasks_handles: LongRunningTaskHandles, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, services: ServiceManager, ) -> Result { info!(log, "setting up sled agent server"); @@ -61,6 +63,7 @@ impl Server { request, services, long_running_tasks_handles, + config_reconciler_spawn_token, ) .await .map_err(|e| e.to_string())?; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 737c4fe0168..8c9972dec60 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -53,9 +53,9 @@ use omicron_common::disk::M2Slot; use omicron_ddm_admin_client::Client as DdmAdminClient; use omicron_uuid_kinds::{GenericUuid, PropolisUuid, SledUuid}; use sled_agent_config_reconciler::{ - ConfigReconcilerHandle, InternalDisksReceiver, LedgerNewConfigError, - LedgerTaskError, ReconcilerInventory, SledAgentArtifactStore, - SledAgentFacilities, TimeSyncStatus, + ConfigReconcilerHandle, ConfigReconcilerSpawnToken, InternalDisksReceiver, + LedgerNewConfigError, LedgerTaskError, ReconcilerInventory, + SledAgentArtifactStore, SledAgentFacilities, TimeSyncStatus, }; use sled_agent_types::disk::DiskStateRequested; use sled_agent_types::early_networking::EarlyNetworkConfig; @@ -403,6 +403,7 @@ impl SledAgent { request: StartSledAgentRequest, services: ServiceManager, long_running_task_handles: LongRunningTaskHandles, + config_reconciler_spawn_token: ConfigReconcilerSpawnToken, ) -> Result { // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. @@ -615,7 +616,7 @@ impl SledAgent { zone_bundler: long_running_task_handles.zone_bundler.clone(), }, SledAgentArtifactStoreWrapper(Arc::clone(&artifact_store)), - &parent_log, + config_reconciler_spawn_token, ); services From 385807fc3da8e27402c8038ff7fca26326ee2e8f Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 20 May 2025 12:57:35 -0400 Subject: [PATCH 03/10] catch up to changes on main since last rebase --- Cargo.lock | 1 + sled-agent/config-reconciler/src/handle.rs | 37 ++-- .../config-reconciler/src/internal_disks.rs | 162 ++++++++++++------ sled-agent/config-reconciler/src/lib.rs | 1 + sled-agent/src/bootstrap/pre_server.rs | 1 + sled-agent/src/long_running_tasks.rs | 26 +-- sled-agent/src/services.rs | 47 +---- sled-agent/src/sled_agent.rs | 71 ++++---- sled-agent/src/support_bundle/storage.rs | 97 +++++++---- sled-agent/zone-images/Cargo.toml | 2 + .../zone-images/src/mupdate_override.rs | 134 +++++++-------- sled-agent/zone-images/src/source_resolver.rs | 87 ++-------- 12 files changed, 337 insertions(+), 329 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0723344d8c3..a7826cdd5f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11704,6 +11704,7 @@ dependencies = [ "omicron-workspace-hack", "pretty_assertions", "serde_json", + "sled-agent-config-reconciler", "sled-storage", "slog", "slog-error-chain", diff --git a/sled-agent/config-reconciler/src/handle.rs b/sled-agent/config-reconciler/src/handle.rs index 7a3ae1ac0e8..e33c72f3f97 100644 --- a/sled-agent/config-reconciler/src/handle.rs +++ b/sled-agent/config-reconciler/src/handle.rs @@ -12,7 +12,6 @@ use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use omicron_common::disk::DatasetName; -use omicron_common::disk::DiskIdentity; use sled_agent_api::ArtifactConfig; use sled_storage::config::MountConfig; use sled_storage::disk::Disk; @@ -37,6 +36,7 @@ use sled_storage::dataset::U2_DEBUG_DATASET; use sled_storage::dataset::ZONE_DATASET; use crate::DatasetTaskError; +use crate::InternalDisksWithBootDisk; use crate::LedgerArtifactConfigError; use crate::LedgerNewConfigError; use crate::LedgerTaskError; @@ -277,7 +277,7 @@ impl ConfigReconcilerHandle { } /// Wait for the internal disks task to start managing the boot disk. - pub async fn wait_for_boot_disk(&mut self) -> Arc { + pub async fn wait_for_boot_disk(&mut self) -> InternalDisksWithBootDisk { self.internal_disks_rx.wait_for_boot_disk().await } @@ -343,16 +343,16 @@ impl ConfigReconcilerHandle { .await } - /// Collect inventory fields relevant to config reconciliation. - pub async fn inventory( + /// Return the currently-ledgered [`OmicronSledConfig`]. + /// + /// # Errors + /// + /// Fails if `spawn_reconciliation_task()` has not yet been called or if we + /// have not yet checked the internal disks for a ledgered config. + pub fn ledgered_sled_config( &self, - log: &Logger, - ) -> Result { - let ledgered_sled_config = match self - .ledger_task - .get() - .map(LedgerTaskHandle::current_config) - { + ) -> Result, InventoryError> { + match self.ledger_task.get().map(LedgerTaskHandle::current_config) { // If we haven't yet spawned the ledger task, or we have but // it's still waiting on disks, we don't know whether we have a // ledgered sled config. It's not reasonable to report `None` in @@ -363,12 +363,19 @@ impl ConfigReconcilerHandle { // for the boot disk and spawn the reconciler task before starting // the dropshot server that allows Nexus to collect inventory. None | Some(CurrentSledConfig::WaitingForInternalDisks) => { - return Err(InventoryError::LedgerContentsNotAvailable); + Err(InventoryError::LedgerContentsNotAvailable) } - Some(CurrentSledConfig::WaitingForInitialConfig) => None, - Some(CurrentSledConfig::Ledgered(config)) => Some(config), - }; + Some(CurrentSledConfig::WaitingForInitialConfig) => Ok(None), + Some(CurrentSledConfig::Ledgered(config)) => Ok(Some(config)), + } + } + /// Collect inventory fields relevant to config reconciliation. + pub async fn inventory( + &self, + log: &Logger, + ) -> Result { + let ledgered_sled_config = self.ledgered_sled_config()?; let zpools = self.currently_managed_zpools_rx.to_inventory(log).await; let datasets = self diff --git a/sled-agent/config-reconciler/src/internal_disks.rs b/sled-agent/config-reconciler/src/internal_disks.rs index e9c05adbdb9..96e2ea82c01 100644 --- a/sled-agent/config-reconciler/src/internal_disks.rs +++ b/sled-agent/config-reconciler/src/internal_disks.rs @@ -25,6 +25,7 @@ use sled_hardware::PooledDiskError; use sled_storage::config::MountConfig; use sled_storage::dataset::CLUSTER_DATASET; use sled_storage::dataset::CONFIG_DATASET; +use sled_storage::dataset::INSTALL_DATASET; use sled_storage::dataset::M2_ARTIFACT_DATASET; use sled_storage::dataset::M2_DEBUG_DATASET; use sled_storage::disk::Disk; @@ -38,7 +39,6 @@ use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::future::Future; -use std::mem; use std::ops::Deref; use std::sync::Arc; use std::time::Duration; @@ -70,6 +70,8 @@ enum InternalDisksReceiverInner { impl InternalDisksReceiver { /// Create an `InternalDisksReceiver` that always reports a fixed set of /// disks. + /// + /// The first disk is set as the boot disk. #[cfg(any(test, feature = "testing"))] pub fn fake_static( mount_config: Arc, @@ -77,18 +79,27 @@ impl InternalDisksReceiver { ) -> Self { let inner = InternalDisksReceiverInner::FakeStatic(Arc::new( disks - .map(|(identity, zpool_name)| { - InternalDiskDetails::fake_details(identity, zpool_name) + .enumerate() + .map(|(i, (identity, zpool_name))| { + InternalDiskDetails::fake_details( + identity, + zpool_name, + i == 0, + ) }) .collect(), )); - // We never report errors from our static set; move the sender to a task - // that idles so we don't get recv errors. + // We never report errors from our static set. If there's a Tokio + // runtime, move the sender to a task that idles so we don't get recv + // errors; otherwise, don't bother because no one can await changes on + // `errors_rx` anyway (e.g., if we're being used from a non-tokio test). let (errors_tx, errors_rx) = watch::channel(Arc::default()); - tokio::spawn(async move { - errors_tx.closed().await; - }); + if tokio::runtime::Handle::try_current().is_ok() { + tokio::spawn(async move { + errors_tx.closed().await; + }); + } Self { mount_config, inner, errors_rx } } @@ -109,7 +120,7 @@ impl InternalDisksReceiver { .clone() .into_iter() .map(|(identity, zpool_name)| { - InternalDiskDetails::fake_details(identity, zpool_name) + InternalDiskDetails::fake_details(identity, zpool_name, false) }) .collect(); let (mapped_tx, mapped_rx) = watch::channel(Arc::new(current)); @@ -122,7 +133,9 @@ impl InternalDisksReceiver { .clone() .into_iter() .map(|(identity, zpool_name)| { - InternalDiskDetails::fake_details(identity, zpool_name) + InternalDiskDetails::fake_details( + identity, zpool_name, false, + ) }) .collect(); if mapped_tx.send(Arc::new(remapped)).is_err() { @@ -206,6 +219,20 @@ impl InternalDisksReceiver { InternalDisks { disks, mount_config: Arc::clone(&self.mount_config) } } + /// Get an [`InternalDisksWithBootDisk`], panicking if we have no boot + /// disk. + /// + /// This method is only available to tests; production code should instead + /// use `current()` and/or `wait_for_boot_disk()`. + #[cfg(any(test, feature = "testing"))] + pub fn current_with_boot_disk(&self) -> InternalDisksWithBootDisk { + let disks = self.current(); + InternalDisksWithBootDisk::new(disks).expect( + "current_with_boot_disk() should be called by \ + tests that set up a fake boot disk", + ) + } + /// Get the current set of managed internal disks and mark the returned /// value as seen. /// @@ -264,40 +291,17 @@ impl InternalDisksReceiver { /// Wait until the boot disk is managed, returning its identity. /// /// Internally updates the most-recently-seen value. - pub(crate) async fn wait_for_boot_disk(&mut self) -> Arc { - match &mut self.inner { - InternalDisksReceiverInner::Real(disks_rx) => loop { - let disks = disks_rx.borrow_and_update(); - if let Some(disk) = disks.iter().find(|d| d.is_boot_disk()) { - return Arc::clone(&disk.identity); - } - mem::drop(disks); - - disks_rx - .changed() - .await - .expect("InternalDisks task never dies"); - }, - #[cfg(any(test, feature = "testing"))] - InternalDisksReceiverInner::FakeStatic(disks) => { - if let Some(disk) = disks.iter().find(|d| d.is_boot_disk()) { - return Arc::clone(&disk.id.identity); - } - panic!("fake InternalDisksReceiver has no boot disk") - } - #[cfg(any(test, feature = "testing"))] - InternalDisksReceiverInner::FakeDynamic(disks_rx) => loop { - let disks = disks_rx.borrow_and_update(); - if let Some(disk) = disks.iter().find(|d| d.is_boot_disk()) { - return Arc::clone(&disk.id.identity); - } - mem::drop(disks); - - disks_rx - .changed() - .await - .expect("InternalDisks task never dies"); - }, + pub(crate) async fn wait_for_boot_disk( + &mut self, + ) -> InternalDisksWithBootDisk { + loop { + let current = self.current_and_update(); + if let Some(with_boot_disk) = + InternalDisksWithBootDisk::new(current) + { + return with_boot_disk; + }; + self.changed().await.expect("InternalDisks task never dies"); } } @@ -327,6 +331,12 @@ impl InternalDisks { }) } + pub fn boot_disk_install_dataset(&self) -> Option { + self.boot_disk_zpool().map(|zpool| { + zpool.dataset_mountpoint(&self.mount_config.root, INSTALL_DATASET) + }) + } + pub fn image_raw_devfs_path( &self, slot: M2Slot, @@ -393,6 +403,58 @@ impl InternalDisks { } } +/// An [`InternalDisks`] with a guaranteed-present boot disk. +pub struct InternalDisksWithBootDisk { + inner: InternalDisks, + boot_disk: InternalDiskDetailsId, +} + +impl InternalDisksWithBootDisk { + fn new(inner: InternalDisks) -> Option { + let boot_disk = inner + .disks + .iter() + .find_map(|d| if d.is_boot_disk() { Some(d.id()) } else { None })?; + Some(Self { inner, boot_disk }) + } + + fn boot_disk(&self) -> &InternalDiskDetails { + match self.inner.disks.get(&self.boot_disk) { + Some(details) => details, + None => unreachable!("boot disk present by construction"), + } + } + + pub fn boot_disk_id(&self) -> &Arc { + &self.boot_disk().id.identity + } + + pub fn boot_disk_zpool(&self) -> ZpoolName { + self.boot_disk().zpool_name + } + + pub fn boot_disk_install_dataset(&self) -> Utf8PathBuf { + self.boot_disk_zpool().dataset_mountpoint( + &self.inner.mount_config().root, + INSTALL_DATASET, + ) + } + + pub fn non_boot_disk_install_datasets( + &self, + ) -> impl Iterator + '_ { + self.inner.disks.iter().filter(|disk| disk.id != self.boot_disk).map( + |disk| { + let dataset = disk.zpool_name.dataset_mountpoint( + &self.inner.mount_config.root, + INSTALL_DATASET, + ); + (disk.zpool_name, dataset) + }, + ) + } +} + // A subset of `Disk` properties. We store this in `InternalDisks` instead of // `Disk`s both to avoid exposing raw `Disk`s outside this crate and to support // easier faking for tests. @@ -452,15 +514,19 @@ impl From<&'_ InternalDisk> for InternalDiskDetails { impl InternalDiskDetails { #[cfg(any(test, feature = "testing"))] - fn fake_details(identity: DiskIdentity, zpool_name: ZpoolName) -> Self { - // We can expand the interface for fake disks if we need to be able to - // specify more of these properties in future tests. + fn fake_details( + identity: DiskIdentity, + zpool_name: ZpoolName, + is_boot_disk: bool, + ) -> Self { Self { id: InternalDiskDetailsId { identity: Arc::new(identity), - is_boot_disk: false, + is_boot_disk, }, zpool_name, + // We can expand the interface for fake disks if we need to be able + // to specify more of these properties in future tests. slot: None, raw_devfs_path: None, } diff --git a/sled-agent/config-reconciler/src/lib.rs b/sled-agent/config-reconciler/src/lib.rs index eaa1c6f5f12..24a3e7b4489 100644 --- a/sled-agent/config-reconciler/src/lib.rs +++ b/sled-agent/config-reconciler/src/lib.rs @@ -76,6 +76,7 @@ pub use handle::ReconcilerInventory; pub use handle::TimeSyncConfig; pub use internal_disks::InternalDisks; pub use internal_disks::InternalDisksReceiver; +pub use internal_disks::InternalDisksWithBootDisk; pub use ledger::LedgerArtifactConfigError; pub use ledger::LedgerNewConfigError; pub use ledger::LedgerTaskError; diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 094b44e6c31..283569ade96 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -143,6 +143,7 @@ impl BootstrapAgentStartup { sled_mode, config.sidecar_revision.clone(), config.switch_zone_maghemite_links.clone(), + long_running_task_handles.zone_image_resolver.clone(), long_running_task_handles .config_reconciler .internal_disks_rx() diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs index ff9a6a53251..e05eb9aa417 100644 --- a/sled-agent/src/long_running_tasks.rs +++ b/sled-agent/src/long_running_tasks.rs @@ -22,14 +22,13 @@ use crate::services::ServiceManager; use crate::sled_agent::SledAgent; use crate::zone_bundle::ZoneBundler; use bootstore::schemes::v0 as bootstore; -use illumos_utils::zpool::ZpoolName; use key_manager::{KeyManager, StorageKeyRequester}; use sled_agent_config_reconciler::{ ConfigReconcilerHandle, ConfigReconcilerSpawnToken, RawDisksSender, TimeSyncConfig, }; use sled_agent_types::zone_bundle::CleanupContext; -use sled_agent_zone_images::{ZoneImageSourceResolver, ZoneImageZpools}; +use sled_agent_zone_images::ZoneImageSourceResolver; use sled_hardware::{HardwareManager, SledMode, UnparsedDisk}; use sled_storage::config::MountConfig; use sled_storage::disk::RawSyntheticDisk; @@ -106,10 +105,9 @@ pub async fn spawn_all_longrunning_tasks( // Wait for the boot disk so that we can work with any ledgers, // such as those needed by the bootstore and sled-agent info!(log, "Waiting for boot disk"); - let disk_id = config_reconciler.wait_for_boot_disk().await; - info!(log, "Found boot disk {:?}", disk_id); + let internal_disks = config_reconciler.wait_for_boot_disk().await; + info!(log, "Found boot disk {:?}", internal_disks.boot_disk_id()); - let all_disks = storage_manager.get_latest_disks().await; let bootstore = spawn_bootstore_tasks( log, &config_reconciler, @@ -118,9 +116,8 @@ pub async fn spawn_all_longrunning_tasks( ) .await; - let zone_bundler = spawn_zone_bundler_tasks(log, &config_reconciler); - let zone_image_resolver = - make_zone_image_resolver(log, &all_disks, &boot_zpool); + let zone_bundler = spawn_zone_bundler_tasks(log, &config_reconciler).await; + let zone_image_resolver = ZoneImageSourceResolver::new(log, internal_disks); ( LongRunningTaskHandles { @@ -227,18 +224,7 @@ async fn spawn_zone_bundler_tasks( config_reconciler.available_datasets_rx(), CleanupContext::default(), ) -} - -fn make_zone_image_resolver( - log: &Logger, - all_disks: &AllDisks, - boot_zpool: &ZpoolName, -) -> ZoneImageSourceResolver { - let zpools = ZoneImageZpools { - root: &all_disks.mount_config().root, - all_m2_zpools: all_disks.all_m2_zpools(), - }; - ZoneImageSourceResolver::new(log, &zpools, boot_zpool) + .await } async fn upsert_synthetic_disks_if_needed( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 31f2f6a70f8..98453b2f8f8 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -96,15 +96,15 @@ use omicron_common::ledger::Ledgerable; use omicron_ddm_admin_client::DdmError; use omicron_uuid_kinds::OmicronZoneUuid; use rand::prelude::SliceRandom; -use sled_agent_zone_images::{ZoneImageSourceResolver, ZoneImageZpools}; use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::sled::SWITCH_ZONE_BASEBOARD_FILE; +use sled_agent_zone_images::ZoneImageSourceResolver; use sled_hardware::SledMode; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware_types::Baseboard; use sled_storage::config::MountConfig; -use sled_storage::dataset::{INSTALL_DATASET, ZONE_DATASET}; +use sled_storage::dataset::ZONE_DATASET; use slog::Logger; use slog_error_chain::InlineErrorChain; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; @@ -920,12 +920,6 @@ impl ServiceManager { } } - - // TODO-john still needed? - #[cfg(all(test, target_os = "illumos"))] - fn override_image_directory(&self, path: Utf8PathBuf) { - self.inner.zone_image_resolver.override_image_directory(path); - } pub(crate) fn ddm_reconciler(&self) -> &DdmReconciler { &self.inner.ddm_reconciler } @@ -1530,44 +1524,9 @@ impl ServiceManager { ZoneArgs::Omicron(zone_config) => &zone_config.zone.image_source, ZoneArgs::Switch(_) => &OmicronZoneImageSource::InstallDataset, }; - let all_disks = self.inner.storage.get_latest_disks().await; - let zpools = ZoneImageZpools { - root: &all_disks.mount_config().root, - all_m2_zpools: all_disks.all_m2_zpools(), -/* TODO-john fix this - let zone_image_file_name = match image_source { - OmicronZoneImageSource::InstallDataset => None, - OmicronZoneImageSource::Artifact { hash } => Some(hash.to_string()), - }; - let internal_disks = self.inner.internal_disks_rx.current(); - let zone_image_paths = match image_source { - OmicronZoneImageSource::InstallDataset => { - // Look for the image in the ramdisk first - let mut zone_image_paths = - vec![Utf8PathBuf::from("/opt/oxide")]; - - // If the boot disk exists, look for the image in the "install" - // dataset there too. - if let Some(boot_zpool) = internal_disks.boot_disk_zpool() { - zone_image_paths.push(boot_zpool.dataset_mountpoint( - &internal_disks.mount_config().root, - INSTALL_DATASET, - )); - } - - zone_image_paths - } - OmicronZoneImageSource::Artifact { .. } => { - internal_disks.all_artifact_datasets().collect() - } -*/ - }; - let boot_zpool = - all_disks.boot_disk().map(|(_, boot_zpool)| boot_zpool); let file_source = self.inner.zone_image_resolver.file_source_for( image_source, - &zpools, - boot_zpool.as_ref(), + self.inner.internal_disks_rx.current(), ); let zone_type_str = match &request { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 8c9972dec60..f4349a3c6c9 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -4,14 +4,14 @@ //! Sled agent implementation -use crate::artifact_store::{self, ArtifactStore}; +use crate::artifact_store::ArtifactStore; use crate::boot_disk_os_writer::BootDiskOsWriter; use crate::bootstrap::config::BOOTSTRAP_AGENT_RACK_INIT_PORT; use crate::bootstrap::early_networking::EarlyNetworkSetupError; use crate::config::Config; use crate::instance_manager::InstanceManager; use crate::long_running_tasks::LongRunningTaskHandles; -use crate::metrics::{self, MetricsManager, MetricsRequestQueue}; +use crate::metrics::{MetricsManager, MetricsRequestQueue}; use crate::nexus::{ NexusClient, NexusNotifierHandle, NexusNotifierInput, NexusNotifierTask, }; @@ -22,6 +22,7 @@ use crate::support_bundle::storage::SupportBundleManager; use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager}; use crate::zone_bundle::BundleError; use crate::zone_bundle::{self, ZoneBundler}; +use anyhow::anyhow; use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use derive_more::From; @@ -31,15 +32,14 @@ use futures::stream::FuturesUnordered; use illumos_utils::opte::PortManager; use illumos_utils::running_zone::RunningZone; use illumos_utils::zpool::ZpoolName; +use itertools::Itertools as _; use nexus_sled_agent_shared::inventory::{ - Inventory, OmicronSledConfig, SledRole, + Inventory, OmicronSledConfig, OmicronZoneConfig, SledRole, }; use omicron_common::address::{ Ipv6Subnet, SLED_PREFIX, get_sled_address, get_switch_zone_address, }; -use omicron_common::api::external::{ - ByteCount, ByteCountRangeError, Generation, Vni, -}; +use omicron_common::api::external::{ByteCount, ByteCountRangeError, Vni}; use omicron_common::api::internal::nexus::{DiskRuntimeState, SledVmmState}; use omicron_common::api::internal::shared::{ ExternalIpGatewayMap, HostPortConfig, RackNetworkConfig, @@ -71,7 +71,9 @@ use sled_agent_types::zone_bundle::{ }; use sled_diagnostics::SledDiagnosticsCmdError; use sled_diagnostics::SledDiagnosticsCmdOutput; -use sled_hardware::{HardwareManager, MemoryReservations, underlay}; +use sled_hardware::{ + HardwareManager, MemoryReservations, PooledDiskError, underlay, +}; use sled_hardware_types::Baseboard; use sled_hardware_types::underlay::BootstrapInterface; use sled_storage::config::MountConfig; @@ -84,7 +86,7 @@ use std::sync::Arc; use tufaceous_artifact::ArtifactHash; use uuid::Uuid; -use illumos_utils::dladm::Dladm; +use illumos_utils::dladm::{Dladm, EtherstubVnic}; use illumos_utils::zone::Api; use illumos_utils::zone::Zones; @@ -295,18 +297,15 @@ pub enum InventoryError { // system. #[error(transparent)] BadByteCount(#[from] ByteCountRangeError), - #[error("failed to get current ledgered disks")] - GetDisksConfig(#[source] sled_storage::error::Error), - #[error("failed to get current ledgered datasets")] - GetDatasetsConfig(#[source] sled_storage::error::Error), + #[error(transparent)] + InventoryError(#[from] sled_agent_config_reconciler::InventoryError), } impl From for omicron_common::api::external::Error { fn from(inventory_error: InventoryError) -> Self { match inventory_error { e @ (InventoryError::BadByteCount(..) - | InventoryError::GetDisksConfig(_) - | InventoryError::GetDatasetsConfig(_)) => { + | InventoryError::InventoryError(_)) => { omicron_common::api::external::Error::internal_error( &InlineErrorChain::new(&e).to_string(), ) @@ -609,8 +608,8 @@ impl SledAgent { // Start reconciling against our ledgered sled config. config_reconciler.spawn_reconciliation_task( - etherstub_vnic, ReconcilerFacilities { + etherstub_vnic, service_manager: services.clone(), metrics_queue: metrics_manager.request_queue(), zone_bundler: long_running_task_handles.zone_bundler.clone(), @@ -1133,7 +1132,7 @@ impl SledAgent { ledgered_sled_config, reconciler_status, last_reconciliation, - } = self.inner.config_reconciler.inventory(); + } = self.inner.config_reconciler.inventory(&self.log).await?; Ok(Inventory { sled_id, @@ -1312,15 +1311,16 @@ pub async fn sled_add( } struct ReconcilerFacilities { + etherstub_vnic: EtherstubVnic, service_manager: ServiceManager, metrics_queue: MetricsRequestQueue, zone_bundler: ZoneBundler, } impl SledAgentFacilities for ReconcilerFacilities { - type StartZoneError = services::Error; - type MetricsUntrackZoneLinksError = Vec; - type ZoneBundleCreateError = BundleError; + fn underlay_vnic(&self) -> &EtherstubVnic { + &self.etherstub_vnic + } async fn on_time_sync(&self) { self.service_manager.on_time_sync().await @@ -1332,26 +1332,35 @@ impl SledAgentFacilities for ReconcilerFacilities { mount_config: &MountConfig, is_time_synchronized: bool, all_u2_pools: &[ZpoolName], - ) -> Result { - self.service_manager + ) -> anyhow::Result { + let zone = self + .service_manager .start_omicron_zone( mount_config, zone_config, is_time_synchronized, all_u2_pools, ) - .await + .await?; + Ok(zone) } fn metrics_untrack_zone_links( &self, zone: &RunningZone, - ) -> Result<(), Self::MetricsUntrackZoneLinksError> { - self.metrics_queue.untrack_zone_links(zone) - } - - fn ddm_add_internal_dns_prefix(&self, prefix: Ipv6Subnet) { - self.service_manager.ddm_reconciler().add_internal_dns_subnet(prefix); + ) -> anyhow::Result<()> { + match self.metrics_queue.untrack_zone_links(zone) { + Ok(()) => Ok(()), + Err(errors) => { + let mut errors = + errors.iter().map(|err| InlineErrorChain::new(err)); + Err(anyhow!( + "{} errors untracking zone links: {}", + errors.len(), + errors.join(", ") + )) + } + } } fn ddm_remove_internal_dns_prefix(&self, prefix: Ipv6Subnet) { @@ -1364,7 +1373,7 @@ impl SledAgentFacilities for ReconcilerFacilities { &self, zone: &RunningZone, cause: ZoneBundleCause, - ) -> Result<(), Self::ZoneBundleCreateError> { + ) -> anyhow::Result<()> { self.zone_bundler.create(zone, cause).await?; Ok(()) } @@ -1374,12 +1383,10 @@ impl SledAgentFacilities for ReconcilerFacilities { struct SledAgentArtifactStoreWrapper(Arc>); impl SledAgentArtifactStore for SledAgentArtifactStoreWrapper { - type ArtifactExistsValidationError = artifact_store::Error; - async fn validate_artifact_exists_in_storage( &self, artifact: ArtifactHash, - ) -> Result<(), Self::ArtifactExistsValidationError> { + ) -> anyhow::Result<()> { self.0.get(artifact).await?; Ok(()) } diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index ad126e9e1b5..0c64b625d8b 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -14,7 +14,6 @@ use futures::Stream; use futures::StreamExt; use illumos_utils::zfs::DatasetProperties; use omicron_common::api::external::Error as ExternalError; -use omicron_common::api::external::Generation; use omicron_common::disk::CompressionAlgorithm; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetName; @@ -31,6 +30,11 @@ use sha2::{Digest, Sha256}; use sled_agent_api::*; use sled_agent_config_reconciler::ConfigReconcilerHandle; use sled_agent_config_reconciler::DatasetTaskError; +use sled_agent_config_reconciler::InventoryError; +use sled_agent_config_reconciler::NestedDatasetDestroyError; +use sled_agent_config_reconciler::NestedDatasetEnsureError; +use sled_agent_config_reconciler::NestedDatasetListError; +use sled_agent_config_reconciler::NestedDatasetMountError; use sled_agent_types::support_bundle::BUNDLE_FILE_NAME; use sled_agent_types::support_bundle::BUNDLE_TMP_FILE_NAME_SUFFIX; use sled_storage::manager::NestedDatasetConfig; @@ -38,7 +42,6 @@ use sled_storage::manager::NestedDatasetListOptions; use sled_storage::manager::NestedDatasetLocation; use slog::Logger; use slog_error_chain::InlineErrorChain; -use std::collections::BTreeMap; use std::io::Write; use tokio::io::AsyncReadExt; use tokio::io::AsyncSeekExt; @@ -91,6 +94,21 @@ pub enum Error { #[error(transparent)] DatasetTask(#[from] DatasetTaskError), + + #[error("Could not access ledgered sled config")] + LedgeredSledConfig(#[source] InventoryError), + + #[error(transparent)] + NestedDatasetMountError(#[from] NestedDatasetMountError), + + #[error(transparent)] + NestedDatasetEnsureError(#[from] NestedDatasetEnsureError), + + #[error(transparent)] + NestedDatasetDestroyError(#[from] NestedDatasetDestroyError), + + #[error(transparent)] + NestedDatasetListError(#[from] NestedDatasetListError), } fn err_str(err: &dyn std::error::Error) -> String { @@ -154,7 +172,7 @@ pub trait LocalStorage: Sync { /// Returns all nested datasets within an existing dataset async fn dyn_nested_dataset_list( &self, - name: NestedDatasetLocation, + name: DatasetName, options: NestedDatasetListOptions, ) -> Result, Error>; @@ -177,21 +195,20 @@ impl LocalStorage for ConfigReconcilerHandle { async fn dyn_datasets_config_list(&self) -> Result { // TODO-cleanup This is super gross; add a better API (maybe fetch a // single dataset by ID, since that's what our caller wants?) - Ok(self - .inventory() - .ledgered_sled_config - .map(|sled_config| DatasetsConfig { - generation: sled_config.generation, - datasets: sled_config - .datasets - .into_iter() - .map(|d| (d.id, d)) - .collect(), - }) - .unwrap_or_else(|| DatasetsConfig { - generation: Generation::new(), - datasets: BTreeMap::new(), - })) + let sled_config = + self.ledgered_sled_config().map_err(Error::LedgeredSledConfig)?; + let sled_config = match sled_config { + Some(config) => config, + None => return Ok(DatasetsConfig::default()), + }; + Ok(DatasetsConfig { + generation: sled_config.generation, + datasets: sled_config + .datasets + .into_iter() + .map(|d| (d.id, d)) + .collect(), + }) } async fn dyn_dataset_get( @@ -222,29 +239,39 @@ impl LocalStorage for ConfigReconcilerHandle { ) -> Result { self.nested_dataset_ensure_mounted(dataset) .await - .map_err(|err| err.into()) + .map_err(Error::from)? + .map_err(Error::from) } async fn dyn_nested_dataset_list( &self, - name: NestedDatasetLocation, + name: DatasetName, options: NestedDatasetListOptions, ) -> Result, Error> { - self.nested_dataset_list(name, options).await.map_err(|err| err.into()) + self.nested_dataset_list(name, options) + .await + .map_err(Error::from)? + .map_err(Error::from) } async fn dyn_nested_dataset_ensure( &self, config: NestedDatasetConfig, ) -> Result<(), Error> { - self.nested_dataset_ensure(config).await.map_err(|err| err.into()) + self.nested_dataset_ensure(config) + .await + .map_err(Error::from)? + .map_err(Error::from) } async fn dyn_nested_dataset_destroy( &self, name: NestedDatasetLocation, ) -> Result<(), Error> { - self.nested_dataset_destroy(name).await.map_err(|err| err.into()) + self.nested_dataset_destroy(name) + .await + .map_err(Error::from)? + .map_err(Error::from) } } @@ -273,10 +300,15 @@ impl LocalStorage for crate::sim::Storage { async fn dyn_nested_dataset_list( &self, - name: NestedDatasetLocation, + name: DatasetName, options: NestedDatasetListOptions, ) -> Result, Error> { - self.lock().nested_dataset_list(name, options).map_err(|err| err.into()) + self.lock() + .nested_dataset_list( + NestedDatasetLocation { path: String::new(), root: name }, + options, + ) + .map_err(|err| err.into()) } async fn dyn_nested_dataset_ensure( @@ -493,12 +525,10 @@ impl<'a> SupportBundleManager<'a> { ) -> Result, Error> { let root = self.get_mounted_dataset_config(zpool_id, dataset_id).await?.name; - let dataset_location = - NestedDatasetLocation { path: String::from(""), root }; let datasets = self .storage .dyn_nested_dataset_list( - dataset_location, + root, NestedDatasetListOptions::ChildrenOnly, ) .await?; @@ -986,12 +1016,15 @@ mod tests { async fn dyn_nested_dataset_list( &self, - name: NestedDatasetLocation, + name: DatasetName, options: NestedDatasetListOptions, ) -> Result, Error> { - self.nested_dataset_list(name, options) - .await - .map_err(|err| err.into()) + self.nested_dataset_list( + NestedDatasetLocation { path: String::new(), root: name }, + options, + ) + .await + .map_err(|err| err.into()) } async fn dyn_nested_dataset_ensure( diff --git a/sled-agent/zone-images/Cargo.toml b/sled-agent/zone-images/Cargo.toml index 5c388a19dd2..cd63d879001 100644 --- a/sled-agent/zone-images/Cargo.toml +++ b/sled-agent/zone-images/Cargo.toml @@ -16,6 +16,7 @@ nexus-sled-agent-shared.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true serde_json.workspace = true +sled-agent-config-reconciler.workspace = true sled-storage.workspace = true slog.workspace = true slog-error-chain.workspace = true @@ -26,3 +27,4 @@ camino-tempfile-ext.workspace = true dropshot.workspace = true omicron-uuid-kinds.workspace = true pretty_assertions.workspace = true +sled-agent-config-reconciler = { workspace = true, features = ["testing"] } diff --git a/sled-agent/zone-images/src/mupdate_override.rs b/sled-agent/zone-images/src/mupdate_override.rs index 14ec5fd4c72..e89cd2aca92 100644 --- a/sled-agent/zone-images/src/mupdate_override.rs +++ b/sled-agent/zone-images/src/mupdate_override.rs @@ -11,14 +11,13 @@ use std::fs::FileType; use std::io; use std::sync::Arc; -use crate::ZoneImageZpools; use camino::Utf8Path; use camino::Utf8PathBuf; use id_map::IdMap; use id_map::IdMappable; use illumos_utils::zpool::ZpoolName; use omicron_common::update::MupdateOverrideInfo; -use sled_storage::dataset::INSTALL_DATASET; +use sled_agent_config_reconciler::InternalDisksWithBootDisk; use slog::debug; use slog::error; use slog::info; @@ -58,11 +57,9 @@ impl AllMupdateOverrides { /// be authoritative). Consider extracting this out into something generic. pub(crate) fn read_all( log: &slog::Logger, - zpools: &ZoneImageZpools<'_>, - boot_zpool: &ZpoolName, + internal_disks: InternalDisksWithBootDisk, ) -> Self { - let dataset = - boot_zpool.dataset_mountpoint(zpools.root, INSTALL_DATASET); + let dataset = internal_disks.boot_disk_install_dataset(); let (boot_disk_path, boot_disk_res) = read_mupdate_override(log, &dataset); @@ -70,18 +67,12 @@ impl AllMupdateOverrides { // Now read the file from all other disks. We attempt to make sure they // match up and will log a warning if they don't, though (until we have // a better story on transient failures) it's not fatal. - let non_boot_zpools = zpools - .all_m2_zpools - .iter() - .filter(|&zpool_name| zpool_name != boot_zpool); - let non_boot_disks_overrides = non_boot_zpools - .map(|zpool_name| { - let dataset = - zpool_name.dataset_mountpoint(zpools.root, INSTALL_DATASET); - + let non_boot_datasets = internal_disks.non_boot_disk_install_datasets(); + let non_boot_disks_overrides = non_boot_datasets + .map(|(zpool_name, dataset)| { let (path, res) = read_mupdate_override(log, &dataset); MupdateOverrideNonBootInfo { - zpool_name: *zpool_name, + zpool_name, path, result: MupdateOverrideNonBootResult::new( res, @@ -92,7 +83,7 @@ impl AllMupdateOverrides { .collect(); let ret = Self { - boot_zpool: *boot_zpool, + boot_zpool: internal_disks.boot_disk_zpool(), boot_disk_path, boot_disk_override: boot_disk_res, non_boot_disk_overrides: non_boot_disks_overrides, @@ -483,9 +474,12 @@ mod tests { use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use dropshot::test_util::LogContext; + use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::MupdateOverrideUuid; use omicron_uuid_kinds::ZpoolUuid; use pretty_assertions::assert_eq; + use sled_agent_config_reconciler::InternalDisksReceiver; + use sled_storage::config::MountConfig; use std::collections::BTreeSet; use std::io; use std::sync::LazyLock; @@ -536,6 +530,33 @@ mod tests { static OVERRIDE_2_UUID: MupdateOverrideUuid = MupdateOverrideUuid::from_u128(0x20588f8f_c680_4101_afc7_820226d03ada); + fn make_internal_disks( + root: &Utf8Path, + boot_zpool: ZpoolName, + other_zpools: &[ZpoolName], + ) -> InternalDisksWithBootDisk { + let identity_from_zpool = |zpool: ZpoolName| DiskIdentity { + vendor: "mupdate-override-tests".to_string(), + model: "fake-disk".to_string(), + serial: zpool.id().to_string(), + }; + let mount_config = MountConfig { + root: root.to_path_buf(), + synthetic_disk_root: root.to_path_buf(), + }; + InternalDisksReceiver::fake_static( + Arc::new(mount_config), + std::iter::once((identity_from_zpool(boot_zpool), boot_zpool)) + .chain( + other_zpools + .iter() + .copied() + .map(|pool| (identity_from_zpool(pool), pool)), + ), + ) + .current_with_boot_disk() + } + /// Boot disk present / no other disks. (This produces a warning, but is /// otherwise okay.) #[test] @@ -551,12 +572,9 @@ mod tests { .write_str(&serde_json::to_string(&override_info).unwrap()) .unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL], - }; + let internal_disks = make_internal_disks(dir.path(), BOOT_ZPOOL, &[]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), Some(&override_info) @@ -583,13 +601,11 @@ mod tests { .write_str(&serde_json::to_string(&override_info).unwrap()) .unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), Some(&override_info) @@ -621,13 +637,11 @@ mod tests { dir.child(&BOOT_PATHS.install_dataset).create_dir_all().unwrap(); dir.child(&NON_BOOT_PATHS.install_dataset).create_dir_all().unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), None, @@ -662,13 +676,11 @@ mod tests { // Create the directory, but not the override JSON within it. dir.child(&NON_BOOT_PATHS.install_dataset).create_dir_all().unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), Some(&override_info) @@ -706,12 +718,10 @@ mod tests { .write_str(&serde_json::to_string(&override_info).unwrap()) .unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), None, @@ -752,12 +762,10 @@ mod tests { .write_str(&serde_json::to_string(&override_info_2).unwrap()) .expect("failed to write override json"); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap().as_ref(), Some(&override_info), @@ -798,12 +806,10 @@ mod tests { .create_dir_all() .unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap_err(), &dataset_missing_error( @@ -841,12 +847,10 @@ mod tests { dir.child(&BOOT_PATHS.install_dataset).touch().unwrap(); dir.child(&NON_BOOT_PATHS.install_dataset).touch().unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![BOOT_ZPOOL, NON_BOOT_ZPOOL], - }; + let internal_disks = + make_internal_disks(dir.path(), BOOT_ZPOOL, &[NON_BOOT_ZPOOL]); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap_err(), &dataset_not_dir_error( @@ -892,17 +896,13 @@ mod tests { // Read error (empty file). dir.child(&NON_BOOT_3_PATHS.override_json).touch().unwrap(); - let zpools = ZoneImageZpools { - root: dir.path(), - all_m2_zpools: vec![ - BOOT_ZPOOL, - NON_BOOT_ZPOOL, - NON_BOOT_2_ZPOOL, - NON_BOOT_3_ZPOOL, - ], - }; + let internal_disks = make_internal_disks( + dir.path(), + BOOT_ZPOOL, + &[NON_BOOT_ZPOOL, NON_BOOT_2_ZPOOL, NON_BOOT_3_ZPOOL], + ); let overrides = - AllMupdateOverrides::read_all(&logctx.log, &zpools, &BOOT_ZPOOL); + AllMupdateOverrides::read_all(&logctx.log, internal_disks); assert_eq!( overrides.boot_disk_override.as_ref().unwrap_err(), &deserialize_error(dir.path(), &BOOT_PATHS.override_json, "",), diff --git a/sled-agent/zone-images/src/source_resolver.rs b/sled-agent/zone-images/src/source_resolver.rs index 576e1676ad0..0ef868960ae 100644 --- a/sled-agent/zone-images/src/source_resolver.rs +++ b/sled-agent/zone-images/src/source_resolver.rs @@ -5,12 +5,10 @@ //! Zone image lookup. use crate::AllMupdateOverrides; -use camino::Utf8Path; use camino::Utf8PathBuf; -use illumos_utils::zpool::ZpoolName; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; -use sled_storage::dataset::INSTALL_DATASET; -use sled_storage::dataset::M2_ARTIFACT_DATASET; +use sled_agent_config_reconciler::InternalDisks; +use sled_agent_config_reconciler::InternalDisksWithBootDisk; use slog::o; use std::sync::Arc; use std::sync::Mutex; @@ -29,16 +27,6 @@ pub struct ZoneImageFileSource { pub search_paths: Vec, } -/// A description of zpools to examine for zone images. -pub struct ZoneImageZpools<'a> { - /// The root directory, typically `/`. - pub root: &'a Utf8Path, - - /// The full set of M.2 zpools that are currently known. Must be non-empty, - /// but it can include the boot zpool. - pub all_m2_zpools: Vec, -} - /// Resolves [`OmicronZoneImageSource`] instances into file names and search /// paths. /// @@ -53,33 +41,25 @@ impl ZoneImageSourceResolver { /// Creates a new `ZoneImageSourceResolver`. pub fn new( log: &slog::Logger, - zpools: &ZoneImageZpools<'_>, - boot_zpool: &ZpoolName, + internal_disks: InternalDisksWithBootDisk, ) -> Self { Self { inner: Arc::new(Mutex::new(ResolverInner::new( - log, zpools, boot_zpool, + log, + internal_disks, ))), } } - /// Overrides the image directory with another one. - /// - /// Intended for testing. - pub fn override_image_directory(&self, path: Utf8PathBuf) { - self.inner.lock().unwrap().override_image_directory(path); - } - /// Returns a [`ZoneImageFileSource`] consisting of the file name, plus a /// list of potential paths to search, for a zone image. pub fn file_source_for( &self, image_source: &OmicronZoneImageSource, - zpools: &ZoneImageZpools<'_>, - boot_zpool: Option<&ZpoolName>, + internal_disks: InternalDisks, ) -> ZoneImageFileSource { let inner = self.inner.lock().unwrap(); - inner.file_source_for(image_source, zpools, boot_zpool) + inner.file_source_for(image_source, internal_disks) } } @@ -99,39 +79,20 @@ struct ResolverInner { impl ResolverInner { fn new( log: &slog::Logger, - zpools: &ZoneImageZpools<'_>, - boot_zpool: &ZpoolName, + internal_disks: InternalDisksWithBootDisk, ) -> Self { let log = log.new(o!("component" => "ZoneImageSourceResolver")); let mupdate_overrides = - AllMupdateOverrides::read_all(&log, zpools, boot_zpool); + AllMupdateOverrides::read_all(&log, internal_disks); Self { log, image_directory_override: None, mupdate_overrides } } - fn override_image_directory( - &mut self, - image_directory_override: Utf8PathBuf, - ) { - if let Some(dir) = &self.image_directory_override { - // Allow idempotent sets to the same directory -- some tests do - // this. - if image_directory_override != *dir { - panic!( - "image_directory_override already set to `{dir}`, \ - attempting to set it to `{image_directory_override}`" - ); - } - } - self.image_directory_override = Some(image_directory_override); - } - fn file_source_for( &self, image_source: &OmicronZoneImageSource, - zpools: &ZoneImageZpools<'_>, - boot_zpool: Option<&ZpoolName>, + internal_disks: InternalDisks, ) -> ZoneImageFileSource { let file_name = match image_source { OmicronZoneImageSource::InstallDataset => { @@ -153,33 +114,17 @@ impl ResolverInner { // If the boot disk exists, look for the image in the "install" // dataset on the boot zpool. - if let Some(boot_zpool) = boot_zpool { - zone_image_paths.push( - boot_zpool - .dataset_mountpoint(zpools.root, INSTALL_DATASET), - ); + if let Some(path) = internal_disks.boot_disk_install_dataset() { + zone_image_paths.push(path); } zone_image_paths } OmicronZoneImageSource::Artifact { .. } => { - // Search both artifact datasets, but look on the boot disk first. - // This iterator starts with the zpool for the boot disk (if it - // exists), and then is followed by all other zpools. - let zpool_iter = boot_zpool.into_iter().chain( - zpools - .all_m2_zpools - .iter() - .filter(|zpool| Some(zpool) != boot_zpool.as_ref()), - ); - zpool_iter - .map(|zpool| { - zpool.dataset_mountpoint( - zpools.root, - M2_ARTIFACT_DATASET, - ) - }) - .collect() + // Search both artifact datasets. This iterator starts with the + // dataset for the boot disk (if it exists), and then is followed + // by all other disks. + internal_disks.all_artifact_datasets().collect() } }; From 2abceb78cbc6712363a93d81706ce48b9abc31ae Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 20 May 2025 17:40:52 -0400 Subject: [PATCH 04/10] fill in some TODOs that were waiting on reconciler --- nexus-sled-agent-shared/src/inventory.rs | 49 +++++++++++++++++++ nexus/inventory/src/examples.rs | 27 ++-------- nexus/reconfigurator/planning/src/planner.rs | 49 +++++++++---------- nexus/reconfigurator/planning/src/system.rs | 29 ++++++++--- .../background/tasks/sync_service_zone_nat.rs | 24 ++++----- 5 files changed, 108 insertions(+), 70 deletions(-) diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 8495c774c49..eae39f4d61f 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -128,6 +128,55 @@ pub struct ConfigReconcilerInventory { pub zones: BTreeMap, } +impl ConfigReconcilerInventory { + /// Iterate over all running zones as reported by the last reconciliation + /// result. + /// + /// This includes zones that are both present in `last_reconciled_config` + /// and whose status in `zones` indicates "successfully running". + pub fn running_omicron_zones( + &self, + ) -> impl Iterator { + self.zones.iter().filter_map(|(zone_id, result)| { + match result { + ConfigReconcilerInventoryResult::Ok => (), + ConfigReconcilerInventoryResult::Err { .. } => return None, + }; + self.last_reconciled_config.zones.get(zone_id) + }) + } + + /// Given a sled config, produce a reconciler result that sled-agent could + /// have emitted if reconciliation succeeded. + /// + /// This method should only be used by tests and dev tools; real code should + /// look at the actual `last_reconciliation` value from the parent + /// [`Inventory`]. + pub fn debug_assume_success(config: OmicronSledConfig) -> Self { + let external_disks = config + .disks + .iter() + .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) + .collect(); + let datasets = config + .datasets + .iter() + .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) + .collect(); + let zones = config + .zones + .iter() + .map(|z| (z.id, ConfigReconcilerInventoryResult::Ok)) + .collect(); + Self { + last_reconciled_config: config, + external_disks, + datasets, + zones, + } + } +} + #[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)] #[serde(tag = "result", rename_all = "snake_case")] pub enum ConfigReconcilerInventoryResult { diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index a7603195273..07af0890f46 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -16,7 +16,6 @@ use gateway_client::types::SpType; use gateway_types::rot::RotSlot; use nexus_sled_agent_shared::inventory::Baseboard; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; -use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDataset; @@ -643,29 +642,9 @@ pub fn sled_agent( ledgered_sled_config: Option, ) -> Inventory { // Assume the `ledgered_sled_config` was reconciled successfully. - let last_reconciliation = ledgered_sled_config.clone().map(|config| { - let external_disks = config - .disks - .iter() - .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) - .collect(); - let datasets = config - .datasets - .iter() - .map(|d| (d.id, ConfigReconcilerInventoryResult::Ok)) - .collect(); - let zones = config - .zones - .iter() - .map(|z| (z.id, ConfigReconcilerInventoryResult::Ok)) - .collect(); - ConfigReconcilerInventory { - last_reconciled_config: config, - external_disks, - datasets, - zones, - } - }); + let last_reconciliation = ledgered_sled_config + .clone() + .map(ConfigReconcilerInventory::debug_assume_success); let reconciler_status = if last_reconciliation.is_some() { ConfigReconcilerInventoryStatus::Idle { diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 04b61483705..9f308c1fac6 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -214,17 +214,18 @@ impl<'a> Planner<'a> { // The sled is not expunged. We have to see if the inventory // reflects the parent blueprint disk generation. If it does // then we mark any expunged disks decommissioned. + // + // TODO-correctness We inspect `last_reconciliation` here to confirm + // that the config reconciler has acted on the generation we're waiting + // for. This might be overly conservative - would it be okay to act on + // `ledgered_sled_config` instead, and decommission disks as long as the + // sled knows it should stop using it (even if it hasn't yet)? let Some(seen_generation) = self .inventory .sled_agents .get(&sled_id) - // TODO-correctness For now this is correct, because sled-agent - // doesn't ledger a config until it's applied it. However, once - // https://github.com/oxidecomputer/omicron/pull/8064 lands, - // sled-agent will ledger a config and then later reconcile it; do - // we need to wait for that reconciliation to decommission disks? - .and_then(|sa| sa.ledgered_sled_config.as_ref()) - .map(|config| config.generation) + .and_then(|sa| sa.last_reconciliation.as_ref()) + .map(|reconciled| reconciled.last_reconciled_config.generation) else { // There is no current inventory for the sled agent, so we cannot // decommission any disks. @@ -424,15 +425,10 @@ impl<'a> Planner<'a> { as_of_generation, ready_for_cleanup, } if !ready_for_cleanup => { - // TODO-correctness For now this is correct, because - // sled-agent doesn't ledger a config until it's applied it. - // However, as a part of landing - // https://github.com/oxidecomputer/omicron/pull/8064, - // this needs to change to check the last reconciled config - // instead of just the ledgered config. - if let Some(config) = &sled_inv.ledgered_sled_config { - if config.generation >= as_of_generation - && !config.zones.contains_key(&zone.id) + if let Some(reconciled) = &sled_inv.last_reconciliation { + if reconciled.last_reconciled_config.generation + >= as_of_generation + && !reconciled.zones.contains_key(&zone.id) { zones_ready_for_cleanup.push(zone.id); } @@ -569,22 +565,23 @@ impl<'a> Planner<'a> { // NTP zone), we'll need to be careful how we do it to avoid a // problem here. // - // TODO-cleanup Once - // https://github.com/oxidecomputer/omicron/pull/8064 lands, the - // above comment will be overly conservative; sled-agent won't - // reject configs just because time isn't sync'd yet. We may be able - // to remove this check entirely. (It's probably also fine to keep - // it for now; removing it just saves us an extra planning iteration - // when adding a new sled.) + // TODO-cleanup The above comment is now overly conservative; + // sled-agent won't reject configs just because time isn't sync'd + // yet. We may be able to remove this check entirely, but we'd need + // to do some testing to confirm no surprises. (It's probably also + // fine to keep it for now; removing it just saves us an extra + // planning iteration when adding a new sled.) let has_ntp_inventory = self .inventory .sled_agents .get(&sled_id) .map(|sled_agent| { - sled_agent.ledgered_sled_config.as_ref().map_or( + sled_agent.last_reconciliation.as_ref().map_or( false, - |config| { - config.zones.iter().any(|z| z.zone_type.is_ntp()) + |reconciliation| { + reconciliation + .running_omicron_zones() + .any(|z| z.zone_type.is_ntp()) }, ) }) diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 12219daaf81..0d4bf6b2916 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -6,6 +6,7 @@ //! associated inventory collections and blueprints use anyhow::{Context, anyhow, bail, ensure}; +use chrono::Utc; use gateway_client::types::RotState; use gateway_client::types::SpState; use indexmap::IndexMap; @@ -13,6 +14,7 @@ use ipnet::Ipv6Net; use ipnet::Ipv6Subnets; use nexus_inventory::CollectionBuilder; use nexus_sled_agent_shared::inventory::Baseboard; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDataset; @@ -57,6 +59,7 @@ use std::fmt::Debug; use std::net::Ipv4Addr; use std::net::Ipv6Addr; use std::sync::Arc; +use std::time::Duration; /// Describes an actual or synthetic Oxide rack for planning and testing /// @@ -389,10 +392,17 @@ impl SystemDescription { })?; let sled = Arc::make_mut(sled); - sled.inventory_sled_agent.ledgered_sled_config = Some(sled_config); + sled.inventory_sled_agent.ledgered_sled_config = + Some(sled_config.clone()); + + // Present results as though the reconciler has successfully completed. sled.inventory_sled_agent.reconciler_status = - ConfigReconcilerInventoryStatus::NotYetRun; - sled.inventory_sled_agent.last_reconciliation = None; + ConfigReconcilerInventoryStatus::Idle { + completed_at: Utc::now(), + ran_for: Duration::from_secs(5), + }; + sled.inventory_sled_agent.last_reconciliation = + Some(ConfigReconcilerInventory::debug_assume_success(sled_config)); Ok(self) } @@ -716,9 +726,16 @@ impl Sled { }) .collect(), datasets: vec![], - ledgered_sled_config: Some(sled_config), - reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, - last_reconciliation: None, + ledgered_sled_config: Some(sled_config.clone()), + reconciler_status: ConfigReconcilerInventoryStatus::Idle { + completed_at: Utc::now(), + ran_for: Duration::from_secs(5), + }, + last_reconciliation: Some( + ConfigReconcilerInventory::debug_assume_success( + sled_config, + ), + ), } }; diff --git a/nexus/src/app/background/tasks/sync_service_zone_nat.rs b/nexus/src/app/background/tasks/sync_service_zone_nat.rs index a1c1869fed9..34798bb9335 100644 --- a/nexus/src/app/background/tasks/sync_service_zone_nat.rs +++ b/nexus/src/app/background/tasks/sync_service_zone_nat.rs @@ -126,21 +126,17 @@ impl BackgroundTask for ServiceZoneNatTracker { let sled_address = oxnet::Ipv6Net::host_net(*sled.ip); // TODO-correctness Looking at inventory here is a little - // sketchy. We currently check the most-recently-ledgered zones - // which tells us what services sled-agent things it's supposed - // to be running. It might be better to check either: - // - // * `sa.last_reconciliation` (to know what zones are actually - // running; this requires - // https://github.com/oxidecomputer/omicron/pull/8064 landing) - // if the goal is to sync what's actually on the sled - // * a rendezvous table populated by reconfigurator if the goal - // is to sync with what's Nexus thinks is supposed to be - // running on the sled + // sketchy. We check the last reconciliation result, which + // should be a view of what zones are actually running on the + // sled. But maybe it would be better to act on a rendezvous + // table populated by reconfigurator if the goal is to sync with + // what's Nexus thinks is supposed to be running on the sled? let zones = sa - .ledgered_sled_config - .map(|config| config.zones) - .unwrap_or_default(); + .last_reconciliation + .iter() + .flat_map(|reconciliation| { + reconciliation.running_omicron_zones().cloned() + }); for zone in zones { let zone_type: OmicronZoneType = zone.zone_type; From 92b491662992e90f35c4dbc191d38f372783c175 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 21 May 2025 16:26:24 -0400 Subject: [PATCH 05/10] fix failing planner tests --- nexus/reconfigurator/planning/src/planner.rs | 186 ++++++++++++------- 1 file changed, 120 insertions(+), 66 deletions(-) diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 9f308c1fac6..07b1c86756d 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -420,23 +420,37 @@ impl<'a> Planner<'a> { .blueprint .current_sled_zones(sled_id, BlueprintZoneDisposition::is_expunged) { - match zone.disposition { + // If this is a zone still waiting for cleanup, what generation of + // sled config was it expunged? + let as_of_generation = match zone.disposition { BlueprintZoneDisposition::Expunged { as_of_generation, ready_for_cleanup, - } if !ready_for_cleanup => { - if let Some(reconciled) = &sled_inv.last_reconciliation { - if reconciled.last_reconciled_config.generation - >= as_of_generation - && !reconciled.zones.contains_key(&zone.id) - { - zones_ready_for_cleanup.push(zone.id); - } - } - } + } if !ready_for_cleanup => as_of_generation, BlueprintZoneDisposition::InService | BlueprintZoneDisposition::Expunged { .. } => continue, + }; + + // If the sled hasn't done any reconciliation, wait until it has. + let Some(reconciliation) = &sled_inv.last_reconciliation else { + continue; + }; + + // If the sled hasn't reconciled a new-enough generation, wait until + // it has. + if reconciliation.last_reconciled_config.generation + < as_of_generation + { + continue; } + + // If the sled hasn't shut down the zone, wait until it has. + if reconciliation.zones.contains_key(&zone.id) { + continue; + } + + // All checks passed: we can mark this zone as ready for cleanup. + zones_ready_for_cleanup.push(zone.id); } if !zones_ready_for_cleanup.is_empty() { @@ -1011,6 +1025,8 @@ pub(crate) mod test { use clickhouse_admin_types::ClickhouseKeeperClusterMembership; use clickhouse_admin_types::KeeperId; use expectorate::assert_contents; + use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; + use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_types::deployment::BlueprintDatasetDisposition; use nexus_types::deployment::BlueprintDiffSummary; use nexus_types::deployment::BlueprintPhysicalDiskDisposition; @@ -2133,9 +2149,10 @@ pub(crate) mod test { .sled_agents .get_mut(&sled_id) .unwrap() - .ledgered_sled_config + .last_reconciliation .as_mut() .unwrap() + .last_reconciled_config .generation = Generation::from_u32(3); let blueprint3 = Planner::new_based_on( @@ -4005,6 +4022,14 @@ pub(crate) mod test { // Use our example system. let (mut collection, input, blueprint1) = example(&log, TEST_NAME); + // Don't start more internal DNS zones (which the planner would, as a + // side effect of our test details). + let input = { + let mut builder = input.into_builder(); + builder.policy_mut().target_internal_dns_zone_count = 0; + builder.build() + }; + // Find a Nexus zone we'll use for our test. let (sled_id, nexus_config) = blueprint1 .sleds @@ -4037,7 +4062,7 @@ pub(crate) mod test { // Run the planner. It should expunge all zones on the disk we just // expunged, including our Nexus zone, but not mark them as ready for // cleanup yet. - let blueprint2 = Planner::new_based_on( + let mut blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, &input, @@ -4049,6 +4074,27 @@ pub(crate) mod test { .plan() .expect("planned"); + // Mark the disk we expected as "ready for cleanup"; this isn't what + // we're testing, and failing to do this will interfere with some of the + // checks we do below. + for mut disk in + blueprint2.sleds.get_mut(&sled_id).unwrap().disks.iter_mut() + { + match disk.disposition { + BlueprintPhysicalDiskDisposition::InService => (), + BlueprintPhysicalDiskDisposition::Expunged { + as_of_generation, + .. + } => { + disk.disposition = + BlueprintPhysicalDiskDisposition::Expunged { + as_of_generation, + ready_for_cleanup: true, + }; + } + } + } + // Helper to extract the Nexus zone's disposition in a blueprint. let get_nexus_disposition = |bp: &Blueprint| { bp.sleds.get(&sled_id).unwrap().zones.iter().find_map(|z| { @@ -4057,8 +4103,8 @@ pub(crate) mod test { }; // This sled's config generation should have been bumped... - let bp2_generation = - blueprint2.sleds.get(&sled_id).unwrap().sled_agent_generation; + let bp2_config = blueprint2.sleds.get(&sled_id).unwrap().clone(); + let bp2_sled_config = bp2_config.clone().into_in_service_sled_config(); assert_eq!( blueprint1 .sleds @@ -4066,24 +4112,25 @@ pub(crate) mod test { .unwrap() .sled_agent_generation .next(), - bp2_generation + bp2_sled_config.generation ); // ... and the Nexus should should have the disposition we expect. assert_eq!( get_nexus_disposition(&blueprint2), Some(BlueprintZoneDisposition::Expunged { - as_of_generation: bp2_generation, + as_of_generation: bp2_sled_config.generation, ready_for_cleanup: false, }) ); // Running the planner again should make no changes until the inventory - // reports that the zone is not running and that the sled has seen a - // new-enough generation. Try three variants that should do nothing: + // reports that the zone is not running and that the sled has reconciled + // a new-enough generation. Try these variants: // - // * same inventory as above - // * inventory reports a new generation (but zone still running) - // * inventory reports zone not running (but still the old generation) + // * same inventory as above (expect no changes) + // * new config is ledgered but not reconciled (expect no changes) + // * new config is reconciled, but zone is in an error state (expect + // no changes) eprintln!("planning with no inventory change..."); assert_planning_makes_no_changes( &logctx.log, @@ -4092,15 +4139,7 @@ pub(crate) mod test { &collection, TEST_NAME, ); - // TODO-cleanup These checks depend on `last_reconciled_config`, which - // is not yet populated; uncomment these and check them by mutating - // `last_reconciled_config` once - // https://github.com/oxidecomputer/omicron/pull/8064 lands. We could - // just mutate `ledgered_sled_config` in the meantime (as this - // commented-out code does below), but that's not really checking what - // we care about. - /* - eprintln!("planning with generation bump but zone still running..."); + eprintln!("planning with config ledgered but not reconciled..."); assert_planning_makes_no_changes( &logctx.log, &blueprint2, @@ -4111,15 +4150,15 @@ pub(crate) mod test { .sled_agents .get_mut(&sled_id) .unwrap() - .ledgered_sled_config - .as_mut() - .unwrap() - .generation = bp2_generation; + .ledgered_sled_config = Some(bp2_sled_config.clone()); collection }, TEST_NAME, ); - eprintln!("planning with zone gone but generation not bumped..."); + eprintln!( + "planning with config ledgered but \ + zones failed to shut down..." + ); assert_planning_makes_no_changes( &logctx.log, &blueprint2, @@ -4130,28 +4169,42 @@ pub(crate) mod test { .sled_agents .get_mut(&sled_id) .unwrap() - .ledgered_sled_config - .as_mut() + .ledgered_sled_config = Some(bp2_sled_config.clone()); + let mut reconciliation = + ConfigReconcilerInventory::debug_assume_success( + bp2_sled_config.clone(), + ); + // For all the zones that are in bp2_config but not + // bp2_sled_config (i.e., zones that should have been shut + // down), insert an error result in the reconciliation. + for zone_id in bp2_config.zones.keys() { + if !reconciliation.zones.contains_key(zone_id) { + reconciliation.zones.insert( + *zone_id, + ConfigReconcilerInventoryResult::Err { + message: "failed to shut down".to_string(), + }, + ); + } + } + collection + .sled_agents + .get_mut(&sled_id) .unwrap() - .zones - .retain(|z| z.id != nexus_config.id); + .last_reconciliation = Some(reconciliation); collection }, TEST_NAME, ); - */ // Now make both changes to the inventory. { - let config = &mut collection - .sled_agents - .get_mut(&sled_id) - .unwrap() - .ledgered_sled_config - .as_mut() - .unwrap(); - config.generation = bp2_generation; - config.zones.retain(|z| z.id != nexus_config.id); + let config = collection.sled_agents.get_mut(&sled_id).unwrap(); + config.ledgered_sled_config = Some(bp2_sled_config.clone()); + config.last_reconciliation = + Some(ConfigReconcilerInventory::debug_assume_success( + bp2_sled_config.clone(), + )); } // Run the planner. It mark our Nexus zone as ready for cleanup now that @@ -4171,7 +4224,7 @@ pub(crate) mod test { assert_eq!( get_nexus_disposition(&blueprint3), Some(BlueprintZoneDisposition::Expunged { - as_of_generation: bp2_generation, + as_of_generation: bp2_sled_config.generation, ready_for_cleanup: true, }) ); @@ -4180,7 +4233,7 @@ pub(crate) mod test { // since it doesn't affect what's sent to sled-agent. assert_eq!( blueprint3.sleds.get(&sled_id).unwrap().sled_agent_generation, - bp2_generation + bp2_sled_config.generation ); assert_planning_makes_no_changes( @@ -4260,8 +4313,12 @@ pub(crate) mod test { }; // This sled's config generation should have been bumped... - let bp2_generation = - blueprint2.sleds.get(&sled_id).unwrap().sled_agent_generation; + let bp2_config = blueprint2 + .sleds + .get(&sled_id) + .unwrap() + .clone() + .into_in_service_sled_config(); assert_eq!( blueprint1 .sleds @@ -4269,13 +4326,13 @@ pub(crate) mod test { .unwrap() .sled_agent_generation .next(), - bp2_generation + bp2_config.generation ); // ... and the DNS zone should should have the disposition we expect. assert_eq!( get_dns_disposition(&blueprint2), Some(BlueprintZoneDisposition::Expunged { - as_of_generation: bp2_generation, + as_of_generation: bp2_config.generation, ready_for_cleanup: false, }) ); @@ -4293,15 +4350,12 @@ pub(crate) mod test { // Make the inventory changes necessary for cleanup to proceed. { - let config = &mut collection - .sled_agents - .get_mut(&sled_id) - .unwrap() - .ledgered_sled_config - .as_mut() - .unwrap(); - config.generation = bp2_generation; - config.zones.retain(|z| z.id != internal_dns_config.id); + let config = &mut collection.sled_agents.get_mut(&sled_id).unwrap(); + config.ledgered_sled_config = Some(bp2_config.clone()); + config.last_reconciliation = + Some(ConfigReconcilerInventory::debug_assume_success( + bp2_config.clone(), + )); } // Run the planner. It should mark our internal DNS zone as ready for @@ -4323,7 +4377,7 @@ pub(crate) mod test { assert_eq!( get_dns_disposition(&blueprint3), Some(BlueprintZoneDisposition::Expunged { - as_of_generation: bp2_generation, + as_of_generation: bp2_config.generation, ready_for_cleanup: true, }) ); From 78c60a366b65fb190db977dd3023300c1e0140c4 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 21 May 2025 16:37:27 -0400 Subject: [PATCH 06/10] expectorate --- dev-tools/omdb/tests/test_all_output.rs | 1 - dev-tools/omdb/tests/usage_errors.out | 28 ------------------------- 2 files changed, 29 deletions(-) diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 5ed7a606fda..c8441924d7f 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -107,7 +107,6 @@ async fn test_omdb_usage_errors() { &["nexus", "sleds"], &["sled-agent"], &["sled-agent", "zones"], - &["sled-agent", "zpools"], &["oximeter", "--help"], &["oxql", "--help"], // Mispelled argument diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 68ff3c59272..472e2b3cc9b 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -909,8 +909,6 @@ Usage: omdb sled-agent [OPTIONS] Commands: zones print information about zones - zpools print information about zpools - datasets print information about datasets bootstore print information about the local bootstore node help Print this message or the help of the given subcommand(s) @@ -949,32 +947,6 @@ Connection Options: --sled-agent-url URL of the Sled internal API [env: OMDB_SLED_AGENT_URL=] --dns-server [env: OMDB_DNS_SERVER=] -Safety Options: - -w, --destructive Allow potentially-destructive subcommands -============================================= -EXECUTING COMMAND: omdb ["sled-agent", "zpools"] -termination: Exited(2) ---------------------------------------------- -stdout: ---------------------------------------------- -stderr: -print information about zpools - -Usage: omdb sled-agent zpools [OPTIONS] - -Commands: - list Print list of all zpools managed by the sled agent - help Print this message or the help of the given subcommand(s) - -Options: - --log-level log level filter [env: LOG_LEVEL=] [default: warn] - --color Color output [default: auto] [possible values: auto, always, never] - -h, --help Print help - -Connection Options: - --sled-agent-url URL of the Sled internal API [env: OMDB_SLED_AGENT_URL=] - --dns-server [env: OMDB_DNS_SERVER=] - Safety Options: -w, --destructive Allow potentially-destructive subcommands ============================================= From 6b543b821e51179d2cb94a6bbd0082a985af5298 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 22 May 2025 11:22:48 -0400 Subject: [PATCH 07/10] flesh out RSS wait_for_config_reconciliation_on_sled() --- sled-agent/src/rack_setup/service.rs | 126 ++++++++++++++++++++++++++- 1 file changed, 122 insertions(+), 4 deletions(-) diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 436cd4503fc..abd07712d2c 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -89,7 +89,8 @@ use nexus_client::{ Client as NexusClient, Error as NexusError, types as NexusTypes, }; use nexus_sled_agent_shared::inventory::{ - OmicronSledConfig, OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, + ConfigReconcilerInventoryResult, OmicronSledConfig, OmicronZoneConfig, + OmicronZoneType, OmicronZonesConfig, }; use nexus_types::deployment::{ Blueprint, BlueprintDatasetConfig, BlueprintDatasetDisposition, @@ -200,6 +201,9 @@ pub enum SetupServiceError { #[error("Error making HTTP request to Sled Agent: {0}")] SledApi(#[from] SledAgentError), + #[error("Sled config not yet reconciled: {0}")] + ConfigNotYetReconciled(String), + #[error("Error making HTTP request to Nexus: {0}")] NexusApi(#[from] NexusError), @@ -427,10 +431,124 @@ impl ServiceInner { // reconciled the config at `generation`. async fn wait_for_config_reconciliation_on_sled( &self, - _sled_address: SocketAddrV6, - _generation: Generation, + sled_address: SocketAddrV6, + generation: Generation, ) -> Result<(), SetupServiceError> { - unimplemented!("needs updated inventory") + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .build() + .map_err(SetupServiceError::HttpClient)?; + let log = self.log.new(o!("sled_address" => sled_address.to_string())); + let client = SledAgentClient::new_with_client( + &format!("http://{}", sled_address), + client, + log.clone(), + ); + + let inv_check = || async { + info!(log, "attempting to read sled's inventory"); + let inventory = match client.inventory().await { + Ok(response) => response.into_inner(), + Err(error) => { + // TODO Many other codes here should not be retried. See + // omicron#4578. + return Err(BackoffError::transient( + SetupServiceError::SledApi(error), + )); + } + }; + + // Has this sled's reconciler run at all? + let Some(last_reconciliation) = inventory.last_reconciliation + else { + return Err(BackoffError::transient( + SetupServiceError::ConfigNotYetReconciled( + "no reconcilation state available".to_string(), + ), + )); + }; + + // Has it attempted to reconcile our target generation? + let reconciled_gen = + last_reconciliation.last_reconciled_config.generation; + + if reconciled_gen < generation { + return Err(BackoffError::transient( + SetupServiceError::ConfigNotYetReconciled(format!( + "reconciled generation {reconciled_gen} lower than \ + desired generation {generation}", + )), + )); + } + + // Were there any errors during reconciliation? Check for disk, + // dataset, and zone errors. + let mut errors = Vec::new(); + + for (disk_id, result) in &last_reconciliation.external_disks { + match result { + ConfigReconcilerInventoryResult::Ok => (), + ConfigReconcilerInventoryResult::Err { message } => { + errors.push(format!( + "reconcilation for disk {disk_id} failed: {message}" + )); + } + } + } + for (dataset_id, result) in &last_reconciliation.datasets { + match result { + ConfigReconcilerInventoryResult::Ok => (), + ConfigReconcilerInventoryResult::Err { message } => { + errors.push(format!( + "reconcilation for dataset {dataset_id} failed: \ + {message}" + )); + } + } + } + for (zone_id, result) in &last_reconciliation.zones { + match result { + ConfigReconcilerInventoryResult::Ok => (), + ConfigReconcilerInventoryResult::Err { message } => { + errors.push(format!( + "reconcilation for zone {zone_id} failed: {message}" + )); + } + } + } + + if errors.is_empty() { + Ok(()) + } else { + // We treat all of these as transient (although it's possible + // some are permanent - we have no means for recovering from + // permanent errors during RSS other than clean-slating and + // starting over, so it's safer to treat these as transient and + // let operators investigate if things are stuck). + Err(BackoffError::transient( + SetupServiceError::ConfigNotYetReconciled( + errors.join(", "), + ), + )) + } + }; + let log_failure = |error, delay| { + warn!( + log, + "sled config not yet reconciled"; + "error" => #%error, + "retry_after" => ?delay, + ); + }; + retry_notify( + retry_policy_internal_service_aggressive(), + inv_check, + log_failure, + ) + .await?; + + Ok(()) } // Ensure that the desired sled configuration for a particular zone version From 6bce90060258fce1faa30a0a0217e66b5d67420e Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 22 May 2025 11:23:07 -0400 Subject: [PATCH 08/10] cargo fmt --- nexus-sled-agent-shared/src/inventory.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index eae39f4d61f..4a775d7ab39 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -168,12 +168,7 @@ impl ConfigReconcilerInventory { .iter() .map(|z| (z.id, ConfigReconcilerInventoryResult::Ok)) .collect(); - Self { - last_reconciled_config: config, - external_disks, - datasets, - zones, - } + Self { last_reconciled_config: config, external_disks, datasets, zones } } } From 8ff4ae3feac08c016d703770c5cf781c464e1944 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 22 May 2025 11:30:33 -0400 Subject: [PATCH 09/10] clippy --- sled-agent/config-reconciler/src/handle.rs | 4 ++-- sled-agent/config-reconciler/src/internal_disks.rs | 4 ++-- sled-agent/src/services.rs | 2 ++ sled-agent/src/sled_agent.rs | 3 +-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sled-agent/config-reconciler/src/handle.rs b/sled-agent/config-reconciler/src/handle.rs index e33c72f3f97..e70fd7fab59 100644 --- a/sled-agent/config-reconciler/src/handle.rs +++ b/sled-agent/config-reconciler/src/handle.rs @@ -475,7 +475,7 @@ impl AvailableDatasetsReceiver { AvailableDatasetsReceiverInner::FakeStatic(pools) => pools .iter() .map(|(pool, path)| PathInPool { - pool: ZpoolOrRamdisk::Zpool(pool.clone()), + pool: ZpoolOrRamdisk::Zpool(*pool), path: path.join(U2_DEBUG_DATASET), }) .collect(), @@ -498,7 +498,7 @@ impl AvailableDatasetsReceiver { AvailableDatasetsReceiverInner::FakeStatic(pools) => pools .iter() .map(|(pool, path)| PathInPool { - pool: ZpoolOrRamdisk::Zpool(pool.clone()), + pool: ZpoolOrRamdisk::Zpool(*pool), path: path.join(ZONE_DATASET), }) .collect(), diff --git a/sled-agent/config-reconciler/src/internal_disks.rs b/sled-agent/config-reconciler/src/internal_disks.rs index 96e2ea82c01..0d0102047d7 100644 --- a/sled-agent/config-reconciler/src/internal_disks.rs +++ b/sled-agent/config-reconciler/src/internal_disks.rs @@ -325,9 +325,9 @@ impl InternalDisks { &self.mount_config } - pub fn boot_disk_zpool(&self) -> Option<&ZpoolName> { + pub fn boot_disk_zpool(&self) -> Option { self.disks.iter().find_map(|d| { - if d.is_boot_disk() { Some(&d.zpool_name) } else { None } + if d.is_boot_disk() { Some(d.zpool_name) } else { None } }) } diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 98453b2f8f8..0400aff0b3f 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -848,7 +848,9 @@ impl ServiceManager { /// - `sidecar_revision`: Rev of attached sidecar, if present. /// - `switch_zone_maghemite_links`: List of physical links on which /// maghemite should listen. + /// - `zone_image_resolver`: how to find Omicron zone images /// - `internal_disks_rx`: watch channel for changes to internal disks + #[allow(clippy::too_many_arguments)] pub(crate) fn new( log: &Logger, ddm_reconciler: DdmReconciler, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index f4349a3c6c9..93c0b9e4730 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -428,8 +428,7 @@ impl SledAgent { .internal_disks_rx() .current() .boot_disk_zpool() - .ok_or_else(|| Error::BootDiskNotFound)? - .clone(); + .ok_or_else(|| Error::BootDiskNotFound)?; // Configure a swap device of the configured size before other system setup. match config.swap_device_size_gb { From 3ab3e791aec829d1d33f3a740203df56313670c2 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 22 May 2025 12:17:08 -0400 Subject: [PATCH 10/10] illumos test fix --- sled-agent/src/support_bundle/storage.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sled-agent/src/support_bundle/storage.rs b/sled-agent/src/support_bundle/storage.rs index 0c64b625d8b..34e6bf37480 100644 --- a/sled-agent/src/support_bundle/storage.rs +++ b/sled-agent/src/support_bundle/storage.rs @@ -981,7 +981,7 @@ mod tests { self.datasets_config_list().await.map_err(|err| err.into()) } - fn dyn_dataset_get( + async fn dyn_dataset_get( &self, dataset_name: &String, ) -> Result { @@ -990,6 +990,7 @@ mod tests { &[dataset_name.clone()], illumos_utils::zfs::WhichDatasets::SelfOnly, ) + .await .map_err(|err| Error::DatasetLookup(err))? .pop() else {