Skip to content

Commit 011b5ed

Browse files
ggevayjkosh44
authored andcommitted
Merge pull request #28256 from ggevay/fixpoint-hash
Save plan hashes in `Fixpoint` rather than full plans
1 parent cf4f0c4 commit 011b5ed

2 files changed

Lines changed: 76 additions & 22 deletions

File tree

src/expr/src/relation.rs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::cmp::{max, Ordering};
1313
use std::collections::{BTreeMap, BTreeSet};
1414
use std::fmt;
1515
use std::fmt::{Display, Formatter};
16+
use std::hash::{DefaultHasher, Hash, Hasher};
1617
use std::num::NonZeroU64;
1718

1819
use bytesize::ByteSize;
@@ -84,7 +85,14 @@ pub trait CollectionPlan {
8485
///
8586
/// The AST is meant to reflect the capabilities of the `differential_dataflow::Collection` type,
8687
/// written generically enough to avoid run-time compilation work.
87-
#[derive(Clone, Debug, Ord, PartialOrd, Serialize, Deserialize, MzReflect)]
88+
///
89+
/// `derived_hash_with_manual_eq` was complaining for the wrong reason: This lint exists because
90+
/// it's bad when `Eq` doesn't agree with `Hash`, which is often quite likely if one of them is
91+
/// implemented manually. However, our manual implementation of `Eq` _will_ agree with the derived
92+
/// one. This is because the reason for the manual implementation is not to change the semantics
93+
/// from the derived one, but to avoid stack overflows.
94+
#[allow(clippy::derived_hash_with_manual_eq)]
95+
#[derive(Clone, Debug, Ord, PartialOrd, Serialize, Deserialize, MzReflect, Hash)]
8896
pub enum MirRelationExpr {
8997
/// A constant relation containing specified rows.
9098
///
@@ -1917,6 +1925,14 @@ impl MirRelationExpr {
19171925
});
19181926
result
19191927
}
1928+
1929+
/// Hash to an u64 using Rust's default Hasher. (Which is a somewhat slower, but better Hasher
1930+
/// than what `Hashable::hashed` would give us.)
1931+
pub fn hash_to_u64(&self) -> u64 {
1932+
let mut h = DefaultHasher::new();
1933+
self.hash(&mut h);
1934+
h.finish()
1935+
}
19201936
}
19211937

19221938
// `LetRec` helpers

src/transform/src/lib.rs

Lines changed: 59 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ pub struct TransformCtx<'a> {
101101
pub df_meta: &'a mut DataflowMetainfo,
102102
}
103103

104+
const FOLD_CONSTANTS_LIMIT: usize = 10000;
105+
104106
impl<'a> TransformCtx<'a> {
105107
/// Generates a [`TransformCtx`] instance for the local MIR optimization
106108
/// stage.
@@ -319,31 +321,59 @@ impl Transform for Fixpoint {
319321
// stable shape.
320322
let mut iter_no = 0;
321323
let mut seen = BTreeMap::new();
322-
seen.insert(relation.clone(), iter_no);
324+
seen.insert(relation.hash_to_u64(), iter_no);
325+
let original = relation.clone();
323326
loop {
324327
let prev_size = relation.size();
325328
for i in iter_no..iter_no + self.limit {
326-
let original = relation.clone();
329+
let prev = relation.clone();
327330
self.apply_transforms(relation, ctx, format!("{i:04}"))?;
328-
if *relation == original {
331+
if *relation == prev {
332+
if prev_size > 100000 {
333+
tracing::warn!(%prev_size, "Very big MIR plan");
334+
}
329335
mz_repr::explain::trace_plan(relation);
330336
return Ok(());
331337
}
332-
let seen_i = seen.insert(relation.clone(), i);
338+
let seen_i = seen.insert(relation.hash_to_u64(), i);
333339
if let Some(seen_i) = seen_i {
334-
// We got into an infinite loop (e.g., we are oscillating between two plans).
335-
// This is not catastrophic, because we can just say we are done now,
336-
// but it would be great to eventually find a way to prevent these loops from
337-
// happening in the first place. We have several relevant issues, see
338-
// https://github.com/MaterializeInc/materialize/issues/27954#issuecomment-2200172227
339-
mz_repr::explain::trace_plan(relation);
340-
soft_panic_or_log!(
341-
"Fixpoint {} detected a loop of length {} after {} iterations",
342-
self.name,
343-
i - seen_i,
344-
i
345-
);
346-
return Ok(());
340+
// Let's see whether this is just a hash collision, or a real loop: Run the
341+
// whole thing from the beginning up until `seen_i`, and compare all the plans
342+
// to the current plan from the outer `for`.
343+
// (It would not be enough to compare only the plan at `seen_i`, because
344+
// then we could miss a real loop if there is also a hash collision somewhere
345+
// in the middle of the loop, because then we'd compare the last plan of the
346+
// loop not with its actual match, but with the colliding plan.)
347+
let mut again = original.clone();
348+
// The `+2` is because:
349+
// - one `+1` is to finally get to the plan at `seen_i`,
350+
// - another `+1` is because we are comparing to `relation` only _before_
351+
// calling `apply_transforms`.
352+
for j in 0..(seen_i + 2) {
353+
if again == *relation {
354+
// We really got into an infinite loop (e.g., we are oscillating between
355+
// two plans). This is not catastrophic, because we can just say we are
356+
// done now, but it would be great to eventually find a way to prevent
357+
// these loops from happening in the first place. We have several
358+
// relevant issues, see
359+
// https://github.com/MaterializeInc/materialize/issues/27954#issuecomment-2200172227
360+
mz_repr::explain::trace_plan(relation);
361+
soft_panic_or_log!(
362+
"Fixpoint `{}` detected a loop of length {} after {} iterations",
363+
self.name,
364+
i - seen_i,
365+
i
366+
);
367+
return Ok(());
368+
}
369+
self.apply_transforms(
370+
&mut again,
371+
ctx,
372+
format!("collision detection {j:04}"),
373+
)?;
374+
}
375+
// If we got here, then this was just a hash collision! Just continue as if
376+
// nothing happened.
347377
}
348378
}
349379
let current_size = relation.size();
@@ -465,7 +495,9 @@ impl Default for FuseAndCollapse {
465495
// Some optimizations fight against this, and we want to be sure to end as a
466496
// `MirRelationExpr::Constant` if that is the case, so that subsequent use can
467497
// clearly see this.
468-
Box::new(fold_constants::FoldConstants { limit: Some(10000) }),
498+
Box::new(fold_constants::FoldConstants {
499+
limit: Some(FOLD_CONSTANTS_LIMIT),
500+
}),
469501
],
470502
}
471503
}
@@ -645,7 +677,9 @@ impl Optimizer {
645677
limit: 100,
646678
transforms: vec![
647679
Box::new(column_knowledge::ColumnKnowledge::default()),
648-
Box::new(fold_constants::FoldConstants { limit: Some(10000) }),
680+
Box::new(fold_constants::FoldConstants {
681+
limit: Some(FOLD_CONSTANTS_LIMIT),
682+
}),
649683
Box::new(demand::Demand::default()),
650684
Box::new(literal_lifting::LiteralLifting::default()),
651685
],
@@ -659,7 +693,9 @@ impl Optimizer {
659693
Box::new(canonicalize_mfp::CanonicalizeMfp),
660694
// Identifies common relation subexpressions.
661695
Box::new(cse::relation_cse::RelationCSE::new(false)),
662-
Box::new(fold_constants::FoldConstants { limit: Some(10000) }),
696+
Box::new(fold_constants::FoldConstants {
697+
limit: Some(FOLD_CONSTANTS_LIMIT),
698+
}),
663699
// Remove threshold operators which have no effect.
664700
// Must be done at the very end of the physical pass, because before
665701
// that (at least at the moment) we cannot be sure that all trees
@@ -717,7 +753,9 @@ impl Optimizer {
717753
// The last RelationCSE before JoinImplementation should be with
718754
// inline_mfp = true.
719755
Box::new(cse::relation_cse::RelationCSE::new(true)),
720-
Box::new(fold_constants::FoldConstants { limit: Some(10000) }),
756+
Box::new(fold_constants::FoldConstants {
757+
limit: Some(FOLD_CONSTANTS_LIMIT),
758+
}),
721759
],
722760
}),
723761
Box::new(

0 commit comments

Comments
 (0)