Skip to content

Commit eda5793

Browse files
perf: Parallelise merging of gdb index scans (#2079)
1 parent 04e8a4b commit eda5793

1 file changed

Lines changed: 113 additions & 29 deletions

File tree

libwild/src/gdb_index.rs

Lines changed: 113 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
use crate::elf::Elf;
99
use crate::error::Context as _;
1010
use crate::error::Result;
11+
use crate::hash::PassThroughHashMap;
12+
use crate::hash::PreHashed;
1113
use crate::layout::FileLayout;
1214
use crate::layout::FileLayoutState;
1315
use crate::layout::GroupState;
@@ -28,6 +30,7 @@ use linker_utils::utils::u32_from_slice;
2830
use linker_utils::utils::u64_from_slice;
2931
use object::read::elf::SectionHeader as _;
3032
use rayon::iter::IntoParallelRefIterator as _;
33+
use rayon::iter::ParallelBridge as _;
3134
use rayon::iter::ParallelIterator as _;
3235
use rayon::slice::ParallelSliceMut as _;
3336
use std::borrow::Cow;
@@ -365,7 +368,8 @@ pub(crate) struct GdbIndexScanResult<'data> {
365368

366369
/// Result of scanning a single input object for GDB index data.
367370
struct PerObjectGdbScan<'data> {
368-
num_cus: usize,
371+
num_cus: u32,
372+
cu_base: u32,
369373
num_addr_entries: usize,
370374
/// `(name, local_cu_index, attrs)`. CU index is 0-based within this object.
371375
/// Names are owned because section data may have been decompressed.
@@ -428,42 +432,56 @@ fn scan_one_object<'data>(
428432
}
429433

430434
Ok(Some(PerObjectGdbScan {
431-
num_cus: boundaries.len(),
435+
num_cus: boundaries.len() as u32,
432436
num_addr_entries,
433437
symbol_entries,
438+
// Populated during merge.
439+
cu_base: 0,
434440
}))
435441
}
436442

443+
struct MergeState<'data> {
444+
buckets: Vec<Vec<NamedCuEntry<'data>>>,
445+
}
446+
447+
#[derive(Debug, Clone, Copy)]
448+
struct NamedCuEntry<'data> {
449+
name: PreHashed<&'data [u8]>,
450+
entry: u32,
451+
}
452+
437453
/// Merge per-object scan results into a single `GdbIndexScanResult`, assigning global CU indices.
438-
fn merge_gdb_index_scans(per_object: Vec<Option<PerObjectGdbScan>>) -> GdbIndexScanResult {
454+
fn merge_gdb_index_scans(mut per_object: Vec<Option<PerObjectGdbScan>>) -> GdbIndexScanResult {
439455
timing_phase!("Merge GDB index scans");
440456

441-
let mut total_cus = 0usize;
442-
let mut total_addr_entries = 0usize;
443-
let mut sym_map: HashMap<&[u8], SymData> = HashMap::new();
444-
let mut per_object_cu_counts = Vec::new();
457+
let mut total_cus = 0_usize;
458+
let mut total_addr_entries = 0_usize;
459+
460+
let per_object_cu_counts = {
461+
verbose_timing_phase!("Pre-merge processing");
462+
463+
per_object
464+
.iter_mut()
465+
.map(|scan| {
466+
scan.as_mut().map_or(0, |scan| {
467+
scan.cu_base = total_cus as u32;
468+
total_cus += scan.num_cus as usize;
469+
total_addr_entries += scan.num_addr_entries;
470+
scan.num_cus
471+
})
472+
})
473+
.collect_vec()
474+
};
445475

446-
for scan in per_object {
447-
let Some(scan) = scan else {
448-
per_object_cu_counts.push(0);
449-
continue;
450-
};
451-
let base = total_cus as u32;
452-
total_cus += scan.num_cus;
453-
total_addr_entries += scan.num_addr_entries;
454-
per_object_cu_counts.push(scan.num_cus as u32);
455-
456-
for (name, local_cu_idx, attrs) in scan.symbol_entries {
457-
let entry = encode_cu_vector_entry(base + local_cu_idx, attrs);
458-
let sd = sym_map.entry(name).or_insert_with_key(|name| SymData {
459-
cv_entries: Vec::with_capacity(4),
460-
hash: gdb_hash(name),
461-
});
462-
sd.cv_entries.push(entry);
463-
}
464-
}
476+
let all_scans = per_object.into_iter().flatten().collect_vec();
465477

466-
let mut sorted = sort_symbols(sym_map);
478+
let num_buckets = rayon::current_num_threads();
479+
480+
let states = bucket_names(&all_scans, num_buckets);
481+
482+
let buckets = build_name_maps(num_buckets, &states);
483+
484+
let mut sorted = sort_symbols(buckets);
467485

468486
sort_cv_entries(&mut sorted);
469487

@@ -485,10 +503,76 @@ fn merge_gdb_index_scans(per_object: Vec<Option<PerObjectGdbScan>>) -> GdbIndexS
485503
}
486504
}
487505

488-
fn sort_symbols(sym_map: HashMap<&[u8], SymData>) -> Vec<(&[u8], SymData)> {
506+
fn bucket_names<'data>(
507+
all_scans: &[PerObjectGdbScan<'data>],
508+
num_buckets: usize,
509+
) -> Vec<MergeState<'data>> {
510+
timing_phase!("Bucket names");
511+
512+
all_scans
513+
.chunks((all_scans.len() / rayon::current_num_threads() / 4).max(1))
514+
.par_bridge()
515+
.map(|scans| {
516+
verbose_timing_phase!("Bucket names chunk");
517+
518+
let mut state = MergeState {
519+
buckets: vec![Vec::new(); num_buckets],
520+
};
521+
522+
for scan in scans {
523+
for (name, local_cu_idx, attrs) in &scan.symbol_entries {
524+
let hash = crate::hash::hash_bytes(name);
525+
let entry = encode_cu_vector_entry(scan.cu_base + local_cu_idx, *attrs);
526+
state.buckets[hash as usize % num_buckets].push(NamedCuEntry {
527+
name: PreHashed::new(name, hash),
528+
entry,
529+
});
530+
}
531+
}
532+
533+
state
534+
})
535+
.collect()
536+
}
537+
538+
fn build_name_maps<'data>(
539+
num_buckets: usize,
540+
states: &[MergeState<'data>],
541+
) -> Vec<HashMap<PreHashed<&'data [u8]>, SymData, crate::hash::PassThroughHasher>> {
542+
timing_phase!("Build name maps");
543+
544+
(0..num_buckets)
545+
.par_bridge()
546+
.map(|bucket_num| {
547+
verbose_timing_phase!("Build name map from bucket");
548+
549+
let mut map = PassThroughHashMap::default();
550+
551+
for state in states {
552+
for v in &state.buckets[bucket_num] {
553+
let sd = map.entry(v.name).or_insert_with(|| SymData {
554+
cv_entries: Vec::with_capacity(4),
555+
hash: gdb_hash(*v.name),
556+
});
557+
sd.cv_entries.push(v.entry);
558+
}
559+
}
560+
561+
map
562+
})
563+
.collect()
564+
}
565+
566+
fn sort_symbols(buckets: Vec<PassThroughHashMap<&[u8], SymData>>) -> Vec<(&[u8], SymData)> {
489567
timing_phase!("Sort symbols");
490568

491-
let mut sorted: Vec<(&[u8], SymData)> = sym_map.into_iter().collect_vec();
569+
let mut sorted: Vec<(&[u8], SymData)> = buckets
570+
.into_iter()
571+
.flat_map(|map| map.into_iter().map(|(k, v)| (*k, v)))
572+
.collect_vec();
573+
574+
verbose_timing_phase!("Parallel sort");
575+
492576
sorted.par_sort_unstable_by_key(|(a, _)| *a);
493577
sorted
494578
}

0 commit comments

Comments
 (0)