Skip to content

Commit 051d557

Browse files
committed
refactor: extract computed_with_row_count helper to deduplicate partition_statistics + rescale pattern
1 parent 9c6c6d6 commit 051d557

File tree

1 file changed

+19
-29
lines changed
  • datafusion/physical-plan/src/operator_statistics

1 file changed

+19
-29
lines changed

datafusion/physical-plan/src/operator_statistics/mod.rs

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,21 @@ fn rescale_byte_size(stats: &mut Statistics, new_num_rows: Precision<usize>) {
500500
};
501501
}
502502

503+
/// Fetches base statistics from the operator's built-in `partition_statistics`,
504+
/// overrides `num_rows` with the registry-computed estimate, and rescales
505+
/// `total_byte_size` proportionally.
506+
///
507+
/// Used by providers that compute a better row count but cannot yet propagate
508+
/// column-level stats (NDV, min/max) through the operator — pending #20184.
509+
fn computed_with_row_count(
510+
plan: &dyn ExecutionPlan,
511+
num_rows: Precision<usize>,
512+
) -> Result<StatisticsResult> {
513+
let mut base = Arc::unwrap_or_clone(plan.partition_statistics(None)?);
514+
rescale_byte_size(&mut base, num_rows);
515+
Ok(StatisticsResult::Computed(ExtendedStatistics::new(base)))
516+
}
517+
503518
/// Statistics provider for [`FilterExec`](crate::filter::FilterExec) that uses
504519
/// pre-computed enhanced child statistics from the registry walk.
505520
///
@@ -719,14 +734,7 @@ impl StatisticsProvider for AggregateStatisticsProvider {
719734

720735
let num_rows = Precision::Inexact(estimate);
721736

722-
// TODO: column-level stats (NDV, min/max) enriched by the registry walk
723-
// are lost here because partition_statistics(None) re-fetches raw child
724-
// stats internally. Once #20184 lands, pass enhanced child_stats so the
725-
// operator's built-in column mapping uses them instead.
726-
let mut base = Arc::unwrap_or_clone(plan.partition_statistics(None)?);
727-
rescale_byte_size(&mut base, num_rows);
728-
729-
Ok(StatisticsResult::Computed(ExtendedStatistics::new(base)))
737+
computed_with_row_count(plan, num_rows)
730738
}
731739
}
732740

@@ -866,13 +874,7 @@ impl StatisticsProvider for JoinStatisticsProvider {
866874
Precision::Inexact(estimated)
867875
};
868876

869-
// TODO: column-level stats (NDV, min/max) enriched by the registry walk
870-
// are lost here because partition_statistics(None) re-fetches raw child
871-
// stats internally. Once #20184 lands, pass enhanced child_stats so the
872-
// operator's built-in column mapping uses them instead.
873-
let mut base = Arc::unwrap_or_clone(plan.partition_statistics(None)?);
874-
rescale_byte_size(&mut base, num_rows);
875-
Ok(StatisticsResult::Computed(ExtendedStatistics::new(base)))
877+
computed_with_row_count(plan, num_rows)
876878
}
877879
}
878880

@@ -922,13 +924,7 @@ impl StatisticsProvider for LimitStatisticsProvider {
922924
},
923925
};
924926

925-
// TODO: column-level stats (NDV, min/max) enriched by the registry walk
926-
// are lost here because partition_statistics(None) re-fetches raw child
927-
// stats internally. Once #20184 lands, pass enhanced child_stats so the
928-
// operator's built-in column mapping uses them instead.
929-
let mut base = Arc::unwrap_or_clone(plan.partition_statistics(None)?);
930-
rescale_byte_size(&mut base, num_rows);
931-
Ok(StatisticsResult::Computed(ExtendedStatistics::new(base)))
927+
computed_with_row_count(plan, num_rows)
932928
}
933929
}
934930

@@ -967,13 +963,7 @@ impl StatisticsProvider for UnionStatisticsProvider {
967963
},
968964
)?;
969965

970-
// TODO: column-level stats (NDV, min/max) enriched by the registry walk
971-
// are lost here because partition_statistics(None) re-fetches raw child
972-
// stats internally. Once #20184 lands, pass enhanced child_stats so the
973-
// operator's built-in column mapping uses them instead.
974-
let mut base = Arc::unwrap_or_clone(plan.partition_statistics(None)?);
975-
rescale_byte_size(&mut base, total);
976-
Ok(StatisticsResult::Computed(ExtendedStatistics::new(base)))
966+
computed_with_row_count(plan, total)
977967
}
978968
}
979969

0 commit comments

Comments
 (0)