@@ -88,13 +88,17 @@ abstract class DeltaSuite extends WholeStageTransformerSuite {
8888 private def deltaInputFileCount (df : org.apache.spark.sql.DataFrame ): Int =
8989 df.inputFiles.length
9090
91- // Counts splits the native scan will execute. Reflects the post-Gluten-rewrite state, so it
92- // catches regressions where Gluten silently dropped pruning (the pre-fix behavior).
93- private def scanPartitionCount (df : org.apache.spark.sql.DataFrame ): Int = {
91+ // Counts the partition directories selected by the executed scan after Gluten's rewrite.
92+ // Backed by `selectedPartitions` -> `relation.location.listFiles(partitionFilters, dataFilters)`,
93+ // which is the exact call site of issue #10511: pre-fix, physical-named partition filters
94+ // could not match Delta's logical partition schema, so this returned all directories. We use
95+ // `getPartitionArray` rather than `getPartitions` because the latter reflects post-coalesce
96+ // splits (Velox may merge small files into one split, hiding the per-partition count).
97+ private def selectedPartitionCount (df : org.apache.spark.sql.DataFrame ): Int = {
9498 val scan = df.queryExecution.executedPlan.collect {
9599 case f : DeltaScanTransformer => f
96100 }.head
97- scan.getPartitions.size
101+ scan.getPartitionArray.length
98102 }
99103
100104 // Regression for issue #10511: with column mapping, a partition column filter must prune
@@ -121,27 +125,27 @@ abstract class DeltaSuite extends WholeStageTransformerSuite {
121125 checkLengthAndPlan(df1, 1 )
122126 checkAnswer(df1, Row (" v2" ) :: Nil )
123127 assert(deltaInputFileCount(df1) == 1 , " Delta should prune to 1 file" )
124- assert(scanPartitionCount (df1) == 1 , " native scan should see 1 split" )
128+ assert(selectedPartitionCount (df1) == 1 , " native scan should see 1 split" )
125129
126130 // Range on partition column (the exact case from the bug report).
127131 val df2 = runQueryAndCompare(" select name from delta_cm_part where id > 2" ) { _ => }
128132 checkLengthAndPlan(df2, 1 )
129133 checkAnswer(df2, Row (" v3" ) :: Nil )
130134 assert(deltaInputFileCount(df2) == 1 )
131- assert(scanPartitionCount (df2) == 1 )
135+ assert(selectedPartitionCount (df2) == 1 )
132136
133137 // IN list on partition column. 2 of 3 partitions match.
134138 val df3 =
135139 runQueryAndCompare(" select name from delta_cm_part where id in (1, 3)" ) { _ => }
136140 checkLengthAndPlan(df3, 2 )
137141 checkAnswer(df3, Row (" v1" ) :: Row (" v3" ) :: Nil )
138142 assert(deltaInputFileCount(df3) == 2 )
139- assert(scanPartitionCount (df3) == 2 )
143+ assert(selectedPartitionCount (df3) == 2 )
140144
141145 // No filter -- baseline: all 3 partitions read.
142146 val dfAll = runQueryAndCompare(" select name from delta_cm_part" ) { _ => }
143147 assert(deltaInputFileCount(dfAll) == 3 )
144- assert(scanPartitionCount (dfAll) == 3 )
148+ assert(selectedPartitionCount (dfAll) == 3 )
145149 }
146150 }
147151
@@ -165,15 +169,15 @@ abstract class DeltaSuite extends WholeStageTransformerSuite {
165169 checkLengthAndPlan(df, 1 )
166170 checkAnswer(df, Row (" v2" ) :: Nil )
167171 assert(deltaInputFileCount(df) == 1 , " Delta should prune to 1 file with both filters" )
168- assert(scanPartitionCount (df) == 1 )
172+ assert(selectedPartitionCount (df) == 1 )
169173
170174 // Filter on only one of two partition columns.
171175 val df2 = runQueryAndCompare(
172176 " select name from delta_cm_part_multi where region = 'eu'" ) { _ => }
173177 checkLengthAndPlan(df2, 2 )
174178 checkAnswer(df2, Row (" v3" ) :: Row (" v4" ) :: Nil )
175179 assert(deltaInputFileCount(df2) == 2 )
176- assert(scanPartitionCount (df2) == 2 )
180+ assert(selectedPartitionCount (df2) == 2 )
177181 }
178182 }
179183
@@ -229,13 +233,13 @@ abstract class DeltaSuite extends WholeStageTransformerSuite {
229233 " select name from delta_cm_part_null where id is null" ) { _ => }
230234 checkAnswer(df1, Row (" vn" ) :: Nil )
231235 assert(deltaInputFileCount(df1) == 1 )
232- assert(scanPartitionCount (df1) == 1 )
236+ assert(selectedPartitionCount (df1) == 1 )
233237
234238 val df2 = runQueryAndCompare(
235239 " select name from delta_cm_part_null where id is not null" ) { _ => }
236240 checkAnswer(df2, Row (" v1" ) :: Row (" v2" ) :: Nil )
237241 assert(deltaInputFileCount(df2) == 2 )
238- assert(scanPartitionCount (df2) == 2 )
242+ assert(selectedPartitionCount (df2) == 2 )
239243 }
240244 }
241245
@@ -260,7 +264,7 @@ abstract class DeltaSuite extends WholeStageTransformerSuite {
260264 checkLengthAndPlan(df, 2 )
261265 checkAnswer(df, Row (" v2" ) :: Row (" v3" ) :: Nil )
262266 assert(deltaInputFileCount(df) == 2 )
263- assert(scanPartitionCount (df) == 2 )
267+ assert(selectedPartitionCount (df) == 2 )
264268 }
265269 }
266270
0 commit comments