Skip to content
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
345e058
perf(encode): add SoA node-price array to the optimal parser (infra)
polaz Jun 16, 2026
afc7d24
perf(encode): SIMD price-set compare for non-abort optimal modes
polaz Jun 16, 2026
aec7ab9
perf(encode): gate optimal-parser SIMD price-set to long ranges only
polaz Jun 16, 2026
a427d8a
perf(encode): macro-expand optimal-parser SIMD price-set per CPU tier
polaz Jun 16, 2026
2ce0b35
perf(encode): vector-load cached ml-prices in the optimal-parser pric…
polaz Jun 16, 2026
63bd6d5
perf(encode): cheaper in-lane deinterleave for the price-set vector load
polaz Jun 16, 2026
4fe3239
perf(encode): generic price-set vector body + NEON tier
polaz Jun 16, 2026
9f52869
perf(encode): wire SSE4.1 price-set tier + per-tier helper correctnes…
polaz Jun 16, 2026
1b0ffae
perf(encode): wire wasm simd128 price-set tier
polaz Jun 16, 2026
b8b17fc
fix(encode): hash the borrowed window in HashChain insert, not the ow…
polaz Jun 16, 2026
8357600
perf(encode): reuse HC dict tables on snapshot restore instead of rea…
polaz Jun 16, 2026
3e064f5
perf(hc): raw u32 MEM_read32 gate in chain walk
polaz Jun 16, 2026
45ee62d
perf(hc): relative-space chain walk, hoist loop invariants
polaz Jun 16, 2026
5df3782
perf(hc): lazy skipping over matchless runs (donor parity)
polaz Jun 16, 2026
a1da846
perf(hc): separate dictMatchState fused into the lazy chain walk
polaz Jun 16, 2026
8710803
perf(hc): monomorphise lazy parse over DICT (dms split)
polaz Jun 16, 2026
ba8dab6
perf(hc): inline the dms walk into the DICT=true monomorph
polaz Jun 16, 2026
d6f69f8
fix(opt): count node-price buffer in workspace estimate
polaz Jun 16, 2026
80ea26e
fix(dict): align HC attach cutoff to upstream 32 KiB
polaz Jun 16, 2026
939a16b
fix(dict): add BT/optimal dict copy path to match upstream shape
polaz Jun 16, 2026
d9b6365
fix(dict): tag dms layout + sync SoA prices in optimal seed
polaz Jun 16, 2026
6bfe828
fix(dict): gate lazy dispatcher on dms.is_primed()
polaz Jun 16, 2026
4f685a0
refactor(opt): make node_prices the single source of truth for price
polaz Jun 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions zstd/src/encoding/bt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ pub(crate) struct BtMatcher {
/// boxed slice (no `cap` field, no in-parse `resize`/realloc) sized to
/// `HC_OPT_NODE_LEN`, mirroring upstream zstd's fixed `opt[ZSTD_OPT_NUM]`.
pub(crate) opt_nodes_scratch: alloc::boxed::Box<[HcOptimalNode]>,
/// SoA companion to `opt_nodes_scratch`: the running DP price for each
/// node, split out of `HcOptimalNode` into its own contiguous `u32`
/// array so the optimal-parser inner price-set loop can SIMD-compare a
/// run of consecutive node prices with a single vector load (the 28-byte
/// AoS node stride would otherwise force a strided gather). Same length
/// as `opt_nodes_scratch`; index `i` is the price of node `i`.
pub(crate) opt_node_prices_scratch: alloc::boxed::Box<[u32]>,
Comment thread
coderabbitai[bot] marked this conversation as resolved.
/// Per-frame scratch for collected match candidates.
pub(crate) opt_candidates_scratch: Vec<MatchCandidate>,
/// Per-frame scratch for the final emitted node stream.
Expand Down Expand Up @@ -104,7 +111,8 @@ impl BtMatcher {
/// payload (`HcOptState` cost tables, lit-price arrays) plus the
/// retained scratch arenas at their growth bounds — node frontier and
/// emitted store (`HC_OPT_NODE_LEN` nodes each, including the `+2`
/// lookahead slack), the consolidated price arena (two frontier-sized
/// lookahead slack), the SoA node-price companion (`HC_OPT_NODE_LEN`
/// `u32`s), the consolidated price arena (two frontier-sized
/// `[price, generation]` pair regions, LL and ML), the per-segment plan
/// buffers, and the candidate ladder (`MAX_HC_SEARCH_DEPTH`). LDM is
/// opt-in and excluded (`ldm_sequences` stays empty on every level
Expand All @@ -116,6 +124,7 @@ impl BtMatcher {
let frontier = HC_OPT_NUM + 1;
core::mem::size_of::<Self>()
+ 2 * HC_OPT_NODE_LEN * core::mem::size_of::<HcOptimalNode>()
+ HC_OPT_NODE_LEN * core::mem::size_of::<u32>()
+ 2 * frontier * core::mem::size_of::<[u32; 2]>()
+ 2 * frontier * core::mem::size_of::<HcOptimalSequence>()
+ MAX_HC_SEARCH_DEPTH * core::mem::size_of::<MatchCandidate>()
Expand Down Expand Up @@ -150,6 +159,7 @@ impl BtMatcher {
// first runs (non-BT strategies never touch these), matching
// the prior lazy `Vec::new()` + grow behaviour.
opt_nodes_scratch: alloc::boxed::Box::default(),
opt_node_prices_scratch: alloc::boxed::Box::default(),
opt_candidates_scratch: Vec::new(),
opt_store_scratch: Vec::new(),
opt_segment_plan_scratch: Vec::new(),
Expand All @@ -171,6 +181,7 @@ impl BtMatcher {
/// (counted by the owner's `size_of`), so only the `Vec` fields contribute.
pub(crate) fn heap_size(&self) -> usize {
let scratch = self.opt_nodes_scratch.len() * core::mem::size_of::<HcOptimalNode>()
+ self.opt_node_prices_scratch.len() * core::mem::size_of::<u32>()
+ self.opt_candidates_scratch.capacity() * core::mem::size_of::<MatchCandidate>()
+ self.opt_store_scratch.capacity() * core::mem::size_of::<HcOptimalNode>()
+ (self.opt_segment_plan_scratch.capacity() + self.opt_seed_plan_scratch.capacity())
Expand Down Expand Up @@ -360,12 +371,14 @@ impl BtMatcher {
) -> (u32, [u32; 3], usize, usize) {
let HcOptimalPlanBuffers {
nodes,
node_prices,
mut candidates,
store,
price_arena,
} = buffers;
candidates.clear();
self.opt_nodes_scratch = nodes;
self.opt_node_prices_scratch = node_prices;
self.opt_candidates_scratch = candidates;
self.opt_store_scratch = store;
self.opt_price_arena = price_arena;
Expand Down Expand Up @@ -600,10 +613,18 @@ impl BtMatcher {
}

#[inline(always)]
pub(crate) fn reset_opt_nodes(nodes: &mut [HcOptimalNode], start: usize, end: usize) {
pub(crate) fn reset_opt_nodes(
nodes: &mut [HcOptimalNode],
node_prices: &mut [u32],
start: usize,
end: usize,
) {
for node in &mut nodes[start..=end] {
Self::reset_opt_node(node);
}
for price in &mut node_prices[start..=end] {
*price = u32::MAX;
}
}

#[inline(always)]
Expand Down
Loading