Skip to content

Commit c0f99c0

Browse files
committed
Optimizations.
1 parent 2047753 commit c0f99c0

2 files changed

Lines changed: 74 additions & 46 deletions

File tree

src/derive/variant.rs

Lines changed: 69 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ impl<T: Int, const N: usize> Encoder<T> for VariantEncoder<T, N> {
2020
impl<T: Int, const N: usize> Buffer for VariantEncoder<T, N> {
2121
fn collect_into(&mut self, out: &mut Vec<u8>) {
2222
assert!(N >= 2);
23-
if core::mem::size_of::<T>() > 1 {
23+
if TypeId::of::<T>() != TypeId::of::<u8>() {
2424
pack_ints(self.0.as_mut_slice(), out);
2525
} else {
2626
pack_bytes_less_than::<N>(bytemuck::must_cast_slice::<T, u8>(self.0.as_slice()), out);
@@ -62,20 +62,30 @@ impl<'a, T: Int + Into<usize>, const N: usize, const C_STYLE: bool> View<'a>
6262
assert!(N >= 2);
6363
if TypeId::of::<T>() != TypeId::of::<u8>() {
6464
unpack_ints::<T>(input, length, &mut self.variants)?;
65-
// TOOD: this uses extra memory bandwith to rescan.
66-
for int in unsafe { self.variants.as_slice(length) } {
67-
if T::from_unaligned(*int).into() >= N {
65+
66+
/// Checks that `unpacked` ints are less than `N`, hopefully
67+
/// without a branch instruction for every int.
68+
fn check_less_than<T: Int + Into<usize>, const N: usize>(
69+
unpacked: &[T::Une],
70+
) -> Result<()> {
71+
if 2u64.pow(std::mem::size_of::<T>() as u32 * 8) - 1 > N as u64
72+
&& unpacked
73+
.iter()
74+
.copied()
75+
.map(T::from_unaligned)
76+
.max()
77+
.map(Into::into)
78+
.unwrap_or(0)
79+
>= N
80+
{
6881
return err("invalid enum variant index");
6982
}
83+
Ok(())
7084
}
85+
86+
check_less_than::<T, N>(unsafe { self.variants.as_slice(length) })?;
7187
} else {
72-
// SAFETY: Checked the type above and [u8; 1] has the
73-
// same memory layout as `u8`.
74-
let out = unsafe {
75-
core::mem::transmute::<&mut CowSlice<'a, T::Une>, &mut CowSlice<'a, u8>>(
76-
&mut self.variants,
77-
)
78-
};
88+
let out = self.variants.cast_mut::<u8>();
7989
if C_STYLE {
8090
unpack_bytes_less_than::<N, 0>(input, length, out)?;
8191
} else {
@@ -130,41 +140,6 @@ mod tests {
130140
#[allow(unused)]
131141
#[test]
132142
fn test_large_c_style_enum() {
133-
#[cfg_attr(not(test), rustfmt::skip)]
134-
#[derive(Encode, Decode)]
135-
enum Enum300 {
136-
V1, V2, V3, V4, V5, V6, V7, V8, V9, V10,
137-
V11, V12, V13, V14, V15, V16, V17, V18, V19, V20,
138-
V21, V22, V23, V24, V25, V26, V27, V28, V29, V30,
139-
V31, V32, V33, V34, V35, V36, V37, V38, V39, V40,
140-
V41, V42, V43, V44, V45, V46, V47, V48, V49, V50,
141-
V51, V52, V53, V54, V55, V56, V57, V58, V59, V60,
142-
V61, V62, V63, V64, V65, V66, V67, V68, V69, V70,
143-
V71, V72, V73, V74, V75, V76, V77, V78, V79, V80,
144-
V81, V82, V83, V84, V85, V86, V87, V88, V89, V90,
145-
V91, V92, V93, V94, V95, V96, V97, V98, V99, V100,
146-
V101, V102, V103, V104, V105, V106, V107, V108, V109, V110,
147-
V111, V112, V113, V114, V115, V116, V117, V118, V119, V120,
148-
V121, V122, V123, V124, V125, V126, V127, V128, V129, V130,
149-
V131, V132, V133, V134, V135, V136, V137, V138, V139, V140,
150-
V141, V142, V143, V144, V145, V146, V147, V148, V149, V150,
151-
V151, V152, V153, V154, V155, V156, V157, V158, V159, V160,
152-
V161, V162, V163, V164, V165, V166, V167, V168, V169, V170,
153-
V171, V172, V173, V174, V175, V176, V177, V178, V179, V180,
154-
V181, V182, V183, V184, V185, V186, V187, V188, V189, V190,
155-
V191, V192, V193, V194, V195, V196, V197, V198, V199, V200,
156-
V201, V202, V203, V204, V205, V206, V207, V208, V209, V210,
157-
V211, V212, V213, V214, V215, V216, V217, V218, V219, V220,
158-
V221, V222, V223, V224, V225, V226, V227, V228, V229, V230,
159-
V231, V232, V233, V234, V235, V236, V237, V238, V239, V240,
160-
V241, V242, V243, V244, V245, V246, V247, V248, V249, V250,
161-
V251, V252, V253, V254, V255, V256, V257, V258, V259, V260,
162-
V261, V262, V263, V264, V265, V266, V267, V268, V269, V270,
163-
V271, V272, V273, V274, V275, V276, V277, V278, V279, V280,
164-
V281, V282, V283, V284, V285, V286, V287, V288, V289, V290,
165-
V291, V292, V293, V294, V295, V296, V297, V298, V299, V300,
166-
}
167-
168143
assert!(matches!(decode(&encode(&Enum300::V42)), Ok(Enum300::V42)));
169144
assert!(matches!(decode(&encode(&Enum300::V300)), Ok(Enum300::V300)));
170145
}
@@ -207,4 +182,52 @@ mod tests {
207182
.collect()
208183
}
209184
crate::bench_encode_decode!(bool_enum_vec: Vec<_>);
185+
186+
#[cfg_attr(not(test), rustfmt::skip)]
187+
#[derive(Encode, Decode, Debug, PartialEq)]
188+
pub enum Enum300 {
189+
V1, V2, V3, V4, V5, V6, V7, V8, V9, V10,
190+
V11, V12, V13, V14, V15, V16, V17, V18, V19, V20,
191+
V21, V22, V23, V24, V25, V26, V27, V28, V29, V30,
192+
V31, V32, V33, V34, V35, V36, V37, V38, V39, V40,
193+
V41, V42, V43, V44, V45, V46, V47, V48, V49, V50,
194+
V51, V52, V53, V54, V55, V56, V57, V58, V59, V60,
195+
V61, V62, V63, V64, V65, V66, V67, V68, V69, V70,
196+
V71, V72, V73, V74, V75, V76, V77, V78, V79, V80,
197+
V81, V82, V83, V84, V85, V86, V87, V88, V89, V90,
198+
V91, V92, V93, V94, V95, V96, V97, V98, V99, V100,
199+
V101, V102, V103, V104, V105, V106, V107, V108, V109, V110,
200+
V111, V112, V113, V114, V115, V116, V117, V118, V119, V120,
201+
V121, V122, V123, V124, V125, V126, V127, V128, V129, V130,
202+
V131, V132, V133, V134, V135, V136, V137, V138, V139, V140,
203+
V141, V142, V143, V144, V145, V146, V147, V148, V149, V150,
204+
V151, V152, V153, V154, V155, V156, V157, V158, V159, V160,
205+
V161, V162, V163, V164, V165, V166, V167, V168, V169, V170,
206+
V171, V172, V173, V174, V175, V176, V177, V178, V179, V180,
207+
V181, V182, V183, V184, V185, V186, V187, V188, V189, V190,
208+
V191, V192, V193, V194, V195, V196, V197, V198, V199, V200,
209+
V201, V202, V203, V204, V205, V206, V207, V208, V209, V210,
210+
V211, V212, V213, V214, V215, V216, V217, V218, V219, V220,
211+
V221, V222, V223, V224, V225, V226, V227, V228, V229, V230,
212+
V231, V232, V233, V234, V235, V236, V237, V238, V239, V240,
213+
V241, V242, V243, V244, V245, V246, V247, V248, V249, V250,
214+
V251, V252, V253, V254, V255, V256, V257, V258, V259, V260,
215+
V261, V262, V263, V264, V265, V266, V267, V268, V269, V270,
216+
V271, V272, V273, V274, V275, V276, V277, V278, V279, V280,
217+
V281, V282, V283, V284, V285, V286, V287, V288, V289, V290,
218+
V291, V292, V293, V294, V295, V296, V297, V298, V299, V300,
219+
}
220+
}
221+
222+
#[cfg(test)]
223+
mod test2 {
224+
use crate::derive::variant::tests::Enum300;
225+
226+
fn bench_data() -> Vec<Enum300> {
227+
crate::random_data(1000)
228+
.into_iter()
229+
.map(|v: u16| unsafe { core::mem::transmute_copy::<_, Enum300>(&(v % 300)) })
230+
.collect()
231+
}
232+
crate::bench_encode_decode!(enum_300_variants_vec: Vec<_>);
210233
}

src/serde/ser.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,11 @@ impl<'a> EncoderWrapper<'a> {
240240
#[inline(always)]
241241
fn variant_index_u8(variant_index: u32) -> Result<u8> {
242242
if variant_index > u8::MAX as u32 {
243+
// Properly optimizing the size of large enums would
244+
// require `serde` to specify the variant count.
245+
//
246+
// Good news: the `derive` version of `bitcode` supports
247+
// arbitrary-sized fieldless enums!
243248
err("enums with more than 256 variants are unsupported")
244249
} else {
245250
Ok(variant_index as u8)

0 commit comments

Comments
 (0)