Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
291 changes: 291 additions & 0 deletions dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,8 @@ enum DbCommands {
Sitreps(sitrep::SitrepHistoryArgs),
/// Print information about sleds
Sleds(SledsArgs),
/// Show instances grouped by the sled they are running on
SledInstances(SledInstancesArgs),
/// Print information about customer instances.
Instance(InstanceArgs),
/// Alias to `omdb instance list`.
Expand Down Expand Up @@ -736,6 +738,72 @@ struct SledsArgs {
filter: Option<SledFilter>,
}

#[derive(Debug, Args, Clone)]
struct SledInstancesArgs {
/// Filter by sled number(s). Comma-separated, ranges allowed
/// (e.g. "0,3,14-16")
#[clap(long = "sled")]
sled_numbers: Option<SledNumbers>,

/// Filter by sled serial number(s). Comma-separated
/// (e.g. "BRM44220010,BRM44220022")
#[clap(long = "serial", use_value_delimiter = true)]
serials: Option<Vec<String>>,

/// Filter by sled UUID(s). Comma-separated
#[clap(long = "sled-id", use_value_delimiter = true)]
sled_ids: Option<Vec<SledUuid>>,
}

/// A comma-separated list of sled numbers with range support
/// (e.g. "0,3,14-16" -> [0, 3, 14, 15, 16]).
#[derive(Debug, Clone)]
struct SledNumbers(Vec<u16>);

const MAX_SLED_NUMBER: u16 = 31;

impl FromStr for SledNumbers {
type Err = String;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut result = Vec::new();
for part in s.split(',') {
let part = part.trim();
if let Some((start, end)) = part.split_once('-') {
let start: u16 = start.trim().parse().map_err(|e| {
format!("invalid sled number '{start}': {e}")
})?;
let end: u16 = end
.trim()
.parse()
.map_err(|e| format!("invalid sled number '{end}': {e}"))?;
if end < start {
return Err(format!("invalid range '{part}': end < start"));
}
if end > MAX_SLED_NUMBER {
return Err(format!(
"sled number {end} exceeds maximum \
({MAX_SLED_NUMBER})"
));
}
result.extend(start..=end);
} else {
let n: u16 = part.parse().map_err(|e| {
format!("invalid sled number '{part}': {e}")
})?;
if n > MAX_SLED_NUMBER {
return Err(format!(
"sled number {n} exceeds maximum \
({MAX_SLED_NUMBER})"
));
}
result.push(n);
}
}
Ok(SledNumbers(result))
}
}

#[derive(Debug, Args, Clone)]
struct RegionArgs {
#[command(subcommand)]
Expand Down Expand Up @@ -1377,6 +1445,15 @@ impl DbArgs {
DbCommands::Sleds(args) => {
cmd_db_sleds(&opctx, &datastore, &fetch_opts, args).await
}
DbCommands::SledInstances(args) => {
cmd_db_sled_instances(
&opctx,
&datastore,
&fetch_opts,
args,
)
.await
}
DbCommands::Instance(InstanceArgs {
command: InstanceCommands::List(args),
}) => {
Expand Down Expand Up @@ -4620,6 +4697,220 @@ async fn cmd_db_sleds(
Ok(())
}

/// Run `omdb db sled-instances`: show instances grouped by sled.
async fn cmd_db_sled_instances(
opctx: &OpContext,
datastore: &DataStore,
fetch_opts: &DbFetchOptions,
args: &SledInstancesArgs,
) -> Result<(), anyhow::Error> {
use nexus_db_schema::schema::instance::dsl;
use nexus_db_schema::schema::vmm::dsl as vmm_dsl;

// Step 1: Fetch the latest inventory collection to get
// sled -> (MGS slot, serial) mappings.
let collection =
CollectionIdOrLatest::Latest.to_collection(opctx, datastore).await?;

// Build a map: sled_id -> (sp_slot, serial). We only care
// about SpType::Sled SPs since instances only run on sleds.
struct SledInfo {
sp_slot: Option<u16>,
serial: String,
}

impl SledInfo {
fn slot_label(&self) -> String {
match self.sp_slot {
Some(sp_slot) => format!("Sled {}", sp_slot),
None => "Sled ???".to_string(),
}
}
}

let mut sled_info: BTreeMap<SledUuid, SledInfo> = BTreeMap::new();

for sled_agent in &collection.sled_agents {
let info = match &sled_agent.baseboard_id {
Some(baseboard_id) => {
let serial = baseboard_id.serial_number.clone();
match collection.sps.get(baseboard_id) {
Some(sp)
if sp.sp_type
== nexus_types::inventory::SpType::Sled =>
{
SledInfo { sp_slot: Some(sp.sp_slot), serial }
}
Some(_) => continue, // not a sled, skip
None => {
eprintln!(
"WARN: no SP found for baseboard \
{} (sled {})",
baseboard_id.serial_number, sled_agent.sled_id,
);
SledInfo { sp_slot: None, serial }
}
}
}
None => {
eprintln!(
"WARN: sled {} has no baseboard ID in \
inventory",
sled_agent.sled_id,
);
SledInfo { sp_slot: None, serial: "unknown".to_string() }
}
};
sled_info.insert(sled_agent.sled_id, info);
}

// Apply filters: keep only sleds matching the requested
// --sled, --serial, or --sled-id criteria.
if args.sled_numbers.is_some()
|| args.serials.is_some()
|| args.sled_ids.is_some()
{
sled_info.retain(|sled_id, info| {
if let Some(ref nums) = args.sled_numbers {
if let Some(sp_slot) = info.sp_slot {
if nums.0.contains(&sp_slot) {
return true;
}
}
}
if let Some(ref serials) = args.serials {
if serials.iter().any(|s| s == &info.serial) {
return true;
}
}
if let Some(ref ids) = args.sled_ids {
if ids.contains(sled_id) {
return true;
}
}
false
});
}

// Step 2: Fetch all non-deleted instances joined with their
// active VMMs.
let limit = fetch_opts.fetch_limit;
let instances: Vec<InstanceAndActiveVmm> = dsl::instance
.filter(dsl::time_deleted.is_null())
.left_join(
vmm_dsl::vmm.on(vmm_dsl::id
.nullable()
.eq(dsl::active_propolis_id)
.and(vmm_dsl::time_deleted.is_null())),
)
.limit(i64::from(u32::from(limit)))
.select((Instance::as_select(), Option::<Vmm>::as_select()))
.load_async(&*datastore.pool_connection_for_tests().await?)
.await
.context("loading instances")?
.into_iter()
.map(|i: (Instance, Option<Vmm>)| i.into())
.collect();

check_limit(&instances, limit, || "listing instances".to_string());

// Step 3: Group instances by sled_id (skip those with no
// active VMM / no sled).
let mut instances_by_sled: BTreeMap<SledUuid, Vec<&InstanceAndActiveVmm>> =
BTreeMap::new();
let mut unmatched: Vec<&InstanceAndActiveVmm> = Vec::new();

for inst in &instances {
let sled_id = match inst.sled_id() {
Some(id) => id,
None => continue, // no active VMM, skip
};
if sled_info.contains_key(&sled_id) {
instances_by_sled.entry(sled_id).or_default().push(inst);
} else {
unmatched.push(inst);
}
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it feels a bit odd to me that we do one big query to fetch every non-deleted instance, with one big fetch limit, and then we add them to a map of instances by sled, when we could just filter on the VMM record's sled UUID?

wouldn't it be nicer to implement this as:

  1. fetch all matching sleds and filter them as we do above
  2. sort sleds
  3. loop over sorted sleds, query for instances belong to that sled, and print the table sled-by-sled

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated the loop, and yeah, it's much cleaner.

This new way means we won't print any instances that don't have a matching sled, which would be an error any way and unlikely to happen, or happen if things are inflight between making the list of sleds and gathering the list of instances.


// Step 4: Sort sleds by slot number so that Sled 2 comes
// before Sled 10.
let mut sorted_sleds: Vec<_> = instances_by_sled
.iter()
.filter_map(|(sled_id, insts)| {
sled_info.get(sled_id).map(|info| (info, *sled_id, insts))
})
.collect();
sorted_sleds.sort_by(|(a, _, _), (b, _, _)| a.sp_slot.cmp(&b.sp_slot));

for (info, sled_id, insts) in &sorted_sleds {
println!(
"{} (serial: {}) sled_id: {}",
info.slot_label(),
info.serial,
sled_id,
);
print_instance_table(insts);
println!();
}

// Step 5: Show instances on sleds not in inventory, but only
// when no filters are active (if the user asked for specific
// sleds, everything else is irrelevant, not "unknown").
let has_filter = args.sled_numbers.is_some()
|| args.serials.is_some()
|| args.sled_ids.is_some();
if !has_filter && !unmatched.is_empty() {
// Group unmatched by sled_id.
let mut unmatched_by_sled: BTreeMap<
SledUuid,
Vec<&InstanceAndActiveVmm>,
> = BTreeMap::new();
for inst in &unmatched {
if let Some(sled_id) = inst.sled_id() {
unmatched_by_sled.entry(sled_id).or_default().push(inst);
}
}
for (sled_id, insts) in &unmatched_by_sled {
println!("??? (serial: unknown) sled_id: {}", sled_id,);
eprintln!(
" NOTE: sled {} not found in latest \
inventory collection",
sled_id,
);
print_instance_table(insts);
println!();
}
}

Ok(())
}

fn print_instance_table(instances: &[&InstanceAndActiveVmm]) {
#[derive(Tabled)]
#[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
struct SledInstanceRow {
instance_id: String,
state: String,
name: String,
Comment on lines +4836 to +4840
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kind of feel like there is more data i would want to have in here; at minimum, I would hope that we also included the instance's Propolis UUID. Perhaps we could refactor this to share more code with the CustomerInstanceRow in omdb db instances? We could change this:

let cir = CustomerInstanceRow {
id: i.instance().id().to_string(),
name: i.instance().name().to_string(),
state: i.effective_state().to_string(),
intent: i.instance().intended_state,
propolis_id: (&i).into(),
?
so that the common fields are in a struct that we would also use here, and then have omdb db instance list embed that struct in one that also adds the host serial/ID?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is how propolis IDs could look after making the change above:

root@oxz_switch0:~# /tmp/omdb db sled-instances --sled 15,16 2> /dev/null
Sled 15 (serial: BRM42220046)  sled_id: 20d857aa-043c-49bd-946e-b3a66f849f5b
INSTANCE_ID                          PROPOLIS_ID                          STATE   NAME         
0ac44fac-7d3d-4e98-996d-eaa7f130cdf7 1c7c3973-66eb-4ff0-b9d2-a22d2efc4569 running four-inst    
f486a222-871e-439e-9e5f-9593ec617aa1 2c66acdb-97a0-47dc-983d-74546a6723c2 running many-14-inst 
b5fcae99-f87c-420b-b2fa-2055c148f804 ad270ace-e1ee-431b-8180-7bf1b8be2769 running many-10-inst 
48d8364f-26bd-4ca5-a99b-70390998832f e2ce08c6-40d2-46d1-89e4-9e298d3dbb93 running many-18-inst 

Sled 16 (serial: BRM42220007)  sled_id: a0653160-81f7-4e78-97ef-721eae8e8c38
INSTANCE_ID                          PROPOLIS_ID                          STATE   NAME         
5cb00f31-1283-4db2-a97e-7276b3da9600 0a0cde5f-7a94-4205-b75d-0df48f0f6226 running many-19-inst 
750aeb06-8b6e-4f93-944f-73cff1f3e26a 0f4deb3d-d7c3-4bc0-8d77-76a1ebad9914 running focal        
c66a10d6-542d-4678-861e-563a0f59c19a 27ef10d5-8d3e-4c26-af31-70a2f928ebad running many-15-inst 
573efd67-c58b-4fca-b64e-d1cb7560b12a 2bb44274-2bcf-49ac-bd90-d2ff22ed1681 running many-5-inst  
428396d5-36e5-42c7-9f21-a891410be917 e6cd1fdb-afb7-41c3-8ffd-e23bc39adf44 running many-12-inst

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think that's great!

}

let rows: Vec<SledInstanceRow> = instances
.iter()
.map(|inst| SledInstanceRow {
instance_id: inst.instance().id().to_string(),
state: inst.effective_state().to_string(),
name: inst.instance().name().to_string(),
})
.collect();

let table = tabled::Table::new(rows)
.with(tabled::settings::Style::empty())
.with(tabled::settings::Padding::new(0, 1, 0, 0))
.to_string();

println!("{}", table);
}

// INSTANCES

/// Run `omdb db instance info`: show details about a customer VM.
Expand Down
2 changes: 2 additions & 0 deletions dev-tools/omdb/tests/usage_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ Commands:
reports
sitreps Show the current history of fault management situation reports
sleds Print information about sleds
sled-instances Show instances grouped by the sled they are running on
instance Print information about customer instances
instances Alias to `omdb instance list`
network Print information about the network
Expand Down Expand Up @@ -204,6 +205,7 @@ Commands:
reports
sitreps Show the current history of fault management situation reports
sleds Print information about sleds
sled-instances Show instances grouped by the sled they are running on
instance Print information about customer instances
instances Alias to `omdb instance list`
network Print information about the network
Expand Down
Loading