structured-world
diff --git a/‎zstd/src/decoding/mod.rs‎
Lines changed: 23 additions & 12 deletions b/‎zstd/src/decoding/mod.rs‎
Lines changed: 23 additions & 12 deletions
diff --git a/‎zstd/src/decoding/scratch.rs‎
Lines changed: 5 additions & 5 deletions b/‎zstd/src/decoding/scratch.rs‎
Lines changed: 5 additions & 5 deletions
@@ -55,22 +55,33 @@ pub(crate) mod prefetch;
 mod ringbuffer;
 #[allow(dead_code)]
 pub(crate) mod scratch;
-// Per-kernel sequence-section decoder entry points. Each module owns
-// its full `#[target_feature]`-tagged decoder body generated by the
-// `define_x86_seq_decoder_tier!` macro in `seq_decoder_x86_kernel`.
-// Per-tier bodies carry the BMI2-direct `peek_bits_triple_bmi2` hot
-// site (#279 round 3 Phase 3); the AVX2 tier additionally routes
-// match-copy through `wildcopy_no_overlap_avx2` (32-byte ymm stride)
-// via `BufferBackend::exec_sequence_inline_avx2` (#279 round 3
-// Phase 4). The shared K-generic `decode_and_execute_sequences_impl`
-// remains for the Scalar arm and aarch64 dispatch only.
-#[cfg(target_arch = "x86_64")]
-#[macro_use]
-pub(crate) mod seq_decoder_x86_kernel;
+// Per-kernel monolithic sequence-section decoder entry points. Each
+// kernel has its own self-contained function with the full pipeline
+// (outer init, both arms, decode_one, execute_one) inlined inside one
+// `#[target_feature]`-scoped body. The dispatcher in
+// `sequence_section_decoder::decode_and_execute_sequences` selects the
+// kernel ONCE per call via cached `detect_cpu_kernel`. aarch64 Neon
+// and Sve still go through the K-generic
+// `decode_and_execute_sequences_impl` shared body until their own
+// monoliths land.
+//
+// The shared helpers (`decode_and_execute_sequences_impl`,
+// `run_pipelined_sequence_loop`, `decode_one_sequence_inline`, the
+// `execute_one_sequence_pipelined*` wrappers) live on aarch64
+// (Neon/Sve dispatch arms in `decode_and_execute_sequences`) and in
+// tests, but are orphan on x86_64 production builds where the
+// per-kernel monoliths bypass them entirely. Each carries
+// `#[allow(dead_code)]` so the `-D warnings` clippy gate stays green
+// on x86_64 without losing the cross-arch reuse. The vestigial
+// `_bmi2`/`_avx2`/`_vbmi2` variants are pre-R12 macro-dispatch
+// helpers with no remaining callers; they should be cleaned up in
+// a follow-up PR once the per-kernel monolithic shape is fully
+// settled.
 #[cfg(target_arch = "x86_64")]
 pub(crate) mod seq_decoder_avx2;
 #[cfg(target_arch = "x86_64")]
 pub(crate) mod seq_decoder_bmi2;
+pub(crate) mod seq_decoder_scalar;
 #[cfg(target_arch = "x86_64")]
 pub(crate) mod seq_decoder_vbmi2;
 pub(crate) mod sequence_execution;
 
@@ -5,7 +5,7 @@ use super::buffer_backend::BufferBackend;
 use super::decode_buffer::DecodeBuffer;
 use super::ringbuffer::RingBuffer;
 use crate::decoding::dictionary::Dictionary;
-use crate::fse::FSETable;
+use crate::fse::SeqFSETable;
 use crate::huff0::HuffmanTable;
 use alloc::vec::Vec;
 use core::ops::{Deref, DerefMut};
@@ -348,19 +348,19 @@ impl Default for FSEScratch {
 
 // Keep LL/ML/OF table *objects* cache-line aligned to avoid cross-table placement
 // effects in DecoderScratch when they are accessed in the same decode hot loop.
-// Note: this aligns the table containers, not the `Vec<Entry>` backing allocations.
+// Note: this aligns the table containers, not the `Vec<SeqSymbol>` backing allocations.
 #[cfg_attr(target_arch = "aarch64", repr(align(128)))]
 #[cfg_attr(not(target_arch = "aarch64"), repr(align(64)))]
-pub struct AlignedFSETable(FSETable);
+pub struct AlignedFSETable(SeqFSETable);
 
 impl AlignedFSETable {
     fn new(max_symbol: u8) -> Self {
-        Self(FSETable::new(max_symbol))
+        Self(SeqFSETable::new(max_symbol))
     }
 }
 
 impl Deref for AlignedFSETable {
-    type Target = FSETable;
+    type Target = SeqFSETable;
 
     fn deref(&self) -> &Self::Target {
         &self.0