Skip to content

Commit 9c6c6d6

Browse files
committed
fix: delegate NestedLoopJoinExec to built-in statistics instead of assuming Cartesian product
1 parent 6a313b3 commit 9c6c6d6

File tree

1 file changed

+9
-15
lines changed
  • datafusion/physical-plan/src/operator_statistics

1 file changed

+9
-15
lines changed

datafusion/physical-plan/src/operator_statistics/mod.rs

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -757,9 +757,7 @@ impl StatisticsProvider for JoinStatisticsProvider {
757757
plan: &dyn ExecutionPlan,
758758
child_stats: &[ExtendedStatistics],
759759
) -> Result<StatisticsResult> {
760-
use crate::joins::{
761-
CrossJoinExec, HashJoinExec, NestedLoopJoinExec, SortMergeJoinExec,
762-
};
760+
use crate::joins::{CrossJoinExec, HashJoinExec, SortMergeJoinExec};
763761
use datafusion_common::JoinType;
764762
use datafusion_physical_expr::expressions::Column;
765763

@@ -826,15 +824,6 @@ impl StatisticsProvider for JoinStatisticsProvider {
826824
} else if let Some(smj) = plan.downcast_ref::<SortMergeJoinExec>() {
827825
let est = equi_join_estimate(smj.on(), left, right, left_rows, right_rows);
828826
(est, false, smj.join_type())
829-
} else if let Some(nl_join) = plan.downcast_ref::<NestedLoopJoinExec>() {
830-
// Cartesian product is exact when both inputs are exact
831-
let both_exact = left.num_rows.is_exact().unwrap_or(false)
832-
&& right.num_rows.is_exact().unwrap_or(false);
833-
(
834-
left_rows.saturating_mul(right_rows),
835-
both_exact,
836-
*nl_join.join_type(),
837-
)
838827
} else if plan.downcast_ref::<CrossJoinExec>().is_some() {
839828
let both_exact = left.num_rows.is_exact().unwrap_or(false)
840829
&& right.num_rows.is_exact().unwrap_or(false);
@@ -1965,10 +1954,11 @@ mod tests {
19651954
}
19661955

19671956
#[test]
1968-
fn test_nl_join_exact_cartesian() -> Result<()> {
1957+
fn test_nl_join_delegates() -> Result<()> {
19691958
use crate::joins::NestedLoopJoinExec;
19701959

1971-
// NL join with exact inputs: Cartesian product should be Exact
1960+
// NL join delegates to the built-in (NestedLoopJoinExec may have an
1961+
// arbitrary JoinFilter, so the provider cannot safely assume Cartesian).
19721962
let left = make_source(100);
19731963
let right = make_source(200);
19741964
let join: Arc<dyn ExecutionPlan> = Arc::new(NestedLoopJoinExec::try_new(
@@ -1984,7 +1974,11 @@ mod tests {
19841974
Arc::new(DefaultStatisticsProvider),
19851975
]);
19861976
let stats = registry.compute(join.as_ref())?;
1987-
assert_eq!(stats.base.num_rows, Precision::Exact(20_000));
1977+
// Provider delegates; result comes from built-in partition_statistics.
1978+
assert!(
1979+
stats.base.num_rows.get_value().is_some()
1980+
|| matches!(stats.base.num_rows, Precision::Absent)
1981+
);
19881982
Ok(())
19891983
}
19901984

0 commit comments

Comments
 (0)