Skip to content

blueprint_planner background task #8287

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 12 commits into from
Jun 18, 2025
Merged
42 changes: 42 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ use nexus_types::deployment::ClickhousePolicy;
use nexus_types::deployment::OximeterReadMode;
use nexus_types::deployment::OximeterReadPolicy;
use nexus_types::internal_api::background::AbandonedVmmReaperStatus;
use nexus_types::internal_api::background::BlueprintPlannerStatus;
use nexus_types::internal_api::background::BlueprintRendezvousStatus;
use nexus_types::internal_api::background::InstanceReincarnationStatus;
use nexus_types::internal_api::background::InstanceUpdaterStatus;
Expand Down Expand Up @@ -1056,6 +1057,9 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
"abandoned_vmm_reaper" => {
print_task_abandoned_vmm_reaper(details);
}
"blueprint_planner" => {
print_task_blueprint_planner(details);
}
"blueprint_executor" => {
print_task_blueprint_executor(details);
}
Expand Down Expand Up @@ -1204,6 +1208,44 @@ fn print_task_abandoned_vmm_reaper(details: &serde_json::Value) {
};
}

fn print_task_blueprint_planner(details: &serde_json::Value) {
let status =
match serde_json::from_value::<BlueprintPlannerStatus>(details.clone())
{
Ok(status) => status,
Err(error) => {
eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
);
return;
}
};
match status {
BlueprintPlannerStatus::Disabled => {
println!(" blueprint planning explicitly disabled by config!");
}
BlueprintPlannerStatus::Error(error) => {
println!(" task did not complete successfully: {error}");
}
BlueprintPlannerStatus::Unchanged { parent_blueprint_id } => {
println!(" plan unchanged from parent {parent_blueprint_id}");
}
BlueprintPlannerStatus::Planned { parent_blueprint_id, error } => {
println!(
" planned new blueprint from parent {parent_blueprint_id}, \
but could not make it the target: {error}"
);
}
BlueprintPlannerStatus::Targeted { blueprint_id, .. } => {
println!(
" planned new blueprint {blueprint_id}, \
and made it the current target"
);
}
}
}

fn print_task_blueprint_executor(details: &serde_json::Value) {
let mut value = details.clone();
// Extract and remove the event report. (If we don't do this, the
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ task: "blueprint_loader"
Loads the current target blueprint from the DB


task: "blueprint_planner"
Updates the target blueprint


task: "blueprint_rendezvous"
reconciles blueprints and inventory collection, updating Reconfigurator-
owned rendezvous tables that other subsystems consume
Expand Down Expand Up @@ -243,6 +247,10 @@ task: "blueprint_loader"
Loads the current target blueprint from the DB


task: "blueprint_planner"
Updates the target blueprint


task: "blueprint_rendezvous"
reconciles blueprints and inventory collection, updating Reconfigurator-
owned rendezvous tables that other subsystems consume
Expand Down Expand Up @@ -426,6 +434,10 @@ task: "blueprint_loader"
Loads the current target blueprint from the DB


task: "blueprint_planner"
Updates the target blueprint


task: "blueprint_rendezvous"
reconciles blueprints and inventory collection, updating Reconfigurator-
owned rendezvous tables that other subsystems consume
Expand Down
22 changes: 22 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,10 @@ task: "blueprint_loader"
Loads the current target blueprint from the DB


task: "blueprint_planner"
Updates the target blueprint


task: "blueprint_rendezvous"
reconciles blueprints and inventory collection, updating Reconfigurator-
owned rendezvous tables that other subsystems consume
Expand Down Expand Up @@ -521,6 +525,13 @@ task: "bfd_manager"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }

task: "blueprint_planner"
configured period: every <REDACTED_DURATION>m
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a dependent task completing
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
blueprint planning explicitly disabled by config!

task: "blueprint_rendezvous"
configured period: every <REDACTED_DURATION>m
currently executing: no
Expand Down Expand Up @@ -1040,6 +1051,13 @@ task: "bfd_manager"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN }

task: "blueprint_planner"
configured period: every <REDACTED_DURATION>m
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a dependent task completing
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
blueprint planning explicitly disabled by config!

task: "blueprint_rendezvous"
configured period: every <REDACTED_DURATION>m
currently executing: no
Expand Down Expand Up @@ -1450,12 +1468,14 @@ parent: <none>
oxp_..........<REDACTED_UUID>.........../crypt/zone/oxz_external_dns_..........<REDACTED_UUID>........... ..........<REDACTED_UUID>........... in service none none off
oxp_..........<REDACTED_UUID>.........../crypt/zone/oxz_internal_dns_..........<REDACTED_UUID>........... ..........<REDACTED_UUID>........... in service none none off
oxp_..........<REDACTED_UUID>.........../crypt/zone/oxz_nexus_..........<REDACTED_UUID>........... ..........<REDACTED_UUID>........... in service none none off
oxp_..........<REDACTED_UUID>.........../crypt/zone/oxz_ntp_..........<REDACTED_UUID>........... ..........<REDACTED_UUID>........... in service none none off


omicron zones:
---------------------------------------------------------------------------------------------------------
zone type zone id image source disposition underlay IP
---------------------------------------------------------------------------------------------------------
boundary_ntp ..........<REDACTED_UUID>........... install dataset in service ::1
clickhouse ..........<REDACTED_UUID>........... install dataset in service ::1
cockroach_db ..........<REDACTED_UUID>........... install dataset in service ::1
crucible_pantry ..........<REDACTED_UUID>........... install dataset in service ::1
Expand Down Expand Up @@ -1552,12 +1572,14 @@ parent: <none>
oxp_..........<REDACTED_UUID>.........../crypt/zone/oxz_external_dns_..........<REDACTED_UUID>........... ..........<REDACTED_UUID>........... in service none none off
oxp_..........<REDACTED_UUID>.........../crypt/zone/oxz_internal_dns_..........<REDACTED_UUID>........... ..........<REDACTED_UUID>........... in service none none off
oxp_..........<REDACTED_UUID>.........../crypt/zone/oxz_nexus_..........<REDACTED_UUID>........... ..........<REDACTED_UUID>........... in service none none off
oxp_..........<REDACTED_UUID>.........../crypt/zone/oxz_ntp_..........<REDACTED_UUID>........... ..........<REDACTED_UUID>........... in service none none off


omicron zones:
---------------------------------------------------------------------------------------------------------
zone type zone id image source disposition underlay IP
---------------------------------------------------------------------------------------------------------
boundary_ntp ..........<REDACTED_UUID>........... install dataset in service ::1
clickhouse ..........<REDACTED_UUID>........... install dataset in service ::1
cockroach_db ..........<REDACTED_UUID>........... install dataset in service ::1
crucible_pantry ..........<REDACTED_UUID>........... install dataset in service ::1
Expand Down
12 changes: 5 additions & 7 deletions docs/reconfigurator.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -124,18 +124,16 @@ The Planner
+----------+ | +----------/
| | |
v v v

"planner"
(eventually a background task)
|
v no
is a new blueprint necessary? ------> done
planner background task
|
| yes
v
generate a new blueprint
|
|
v no
is the new blueprint different from the current target? ------> done
|
| yes
v
commit blueprint to database
|
Expand Down
16 changes: 15 additions & 1 deletion nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,19 +592,27 @@ pub struct PhantomDiskConfig {
#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct BlueprintTasksConfig {
/// background planner chicken switch
pub disable_planner: bool,

/// period (in seconds) for periodic activations of the background task that
/// reads the latest target blueprint from the database
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs_load: Duration,

/// period (in seconds) for periodic activations of the background task that
/// plans and updates the target blueprint
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs_plan: Duration,

/// period (in seconds) for periodic activations of the background task that
/// executes the latest target blueprint
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs_execute: Duration,

/// period (in seconds) for periodic activations of the background task that
/// reconciles the latest blueprint and latest inventory collection into
/// Rencofigurator rendezvous tables
/// Reconfigurator rendezvous tables
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs_rendezvous: Duration,

Expand Down Expand Up @@ -1055,7 +1063,9 @@ mod test {
physical_disk_adoption.period_secs = 30
decommissioned_disk_cleaner.period_secs = 30
phantom_disks.period_secs = 30
blueprints.disable_planner = true
blueprints.period_secs_load = 10
blueprints.period_secs_plan = 60
blueprints.period_secs_execute = 60
blueprints.period_secs_rendezvous = 300
blueprints.period_secs_collect_crdb_node_ids = 180
Expand Down Expand Up @@ -1220,7 +1230,9 @@ mod test {
period_secs: Duration::from_secs(30),
},
blueprints: BlueprintTasksConfig {
disable_planner: true,
period_secs_load: Duration::from_secs(10),
period_secs_plan: Duration::from_secs(60),
period_secs_execute: Duration::from_secs(60),
period_secs_collect_crdb_node_ids:
Duration::from_secs(180),
Expand Down Expand Up @@ -1364,7 +1376,9 @@ mod test {
physical_disk_adoption.period_secs = 30
decommissioned_disk_cleaner.period_secs = 30
phantom_disks.period_secs = 30
blueprints.disable_planner = true
blueprints.period_secs_load = 10
blueprints.period_secs_plan = 60
blueprints.period_secs_execute = 60
blueprints.period_secs_rendezvous = 300
blueprints.period_secs_collect_crdb_node_ids = 180
Expand Down
1 change: 1 addition & 0 deletions nexus/background-task-interface/src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub struct BackgroundTasks {
pub task_decommissioned_disk_cleaner: Activator,
pub task_phantom_disks: Activator,
pub task_blueprint_loader: Activator,
pub task_blueprint_planner: Activator,
pub task_blueprint_executor: Activator,
pub task_blueprint_rendezvous: Activator,
pub task_crdb_node_id_collector: Activator,
Expand Down
2 changes: 2 additions & 0 deletions nexus/examples/config-second.toml
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ phantom_disks.period_secs = 30
physical_disk_adoption.period_secs = 30
support_bundle_collector.period_secs = 30
decommissioned_disk_cleaner.period_secs = 60
blueprints.disable_planner = true
blueprints.period_secs_load = 10
blueprints.period_secs_plan = 60
blueprints.period_secs_execute = 60
blueprints.period_secs_rendezvous = 300
blueprints.period_secs_collect_crdb_node_ids = 180
Expand Down
2 changes: 2 additions & 0 deletions nexus/examples/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ phantom_disks.period_secs = 30
physical_disk_adoption.period_secs = 30
support_bundle_collector.period_secs = 30
decommissioned_disk_cleaner.period_secs = 60
blueprints.disable_planner = true
blueprints.period_secs_load = 10
blueprints.period_secs_plan = 60
blueprints.period_secs_execute = 60
blueprints.period_secs_rendezvous = 300
blueprints.period_secs_collect_crdb_node_ids = 180
Expand Down
Loading
Loading