[GLUTEN-10511][VL][Delta] Apply scalafmt formatting

sezruby · claude · sezruby · commit 271302a28b6d · 2026-06-04T09:04:36.000-07:00
Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/gluten-delta/src/main/scala/org/apache/gluten/extension/DeltaPostTransformRules.scala b/gluten-delta/src/main/scala/org/apache/gluten/extension/DeltaPostTransformRules.scala
@@ -168,12 +168,12 @@ object DeltaPostTransformRules {
    * transform the metadata of Delta into Parquet's, each plan should only be transformed once.
    *
    * Partition and data filters on the scan node stay LOGICAL so that Delta's
-   * `PreparedDeltaFileIndex` can do partition pruning and file-level data skipping (its
-   * partition schema and column-stats schema both use logical names). Reader-facing pieces
-   * (`output`, `dataSchema`, and the data fields of `requiredSchema`) become physical so the
-   * parquet reader and Velox find the right columns in the file. Filter binding to the native
-   * side is by exprId, not by name, so logical-named filter attributes still resolve correctly
-   * against the physical-named `output`.
+   * `PreparedDeltaFileIndex` can do partition pruning and file-level data skipping (its partition
+   * schema and column-stats schema both use logical names). Reader-facing pieces (`output`,
+   * `dataSchema`, and the data fields of `requiredSchema`) become physical so the parquet reader
+   * and Velox find the right columns in the file. Filter binding to the native side is by exprId,
+   * not by name, so logical-named filter attributes still resolve correctly against the
+   * physical-named `output`.
    */
   private def transformColumnMappingPlan(plan: SparkPlan): SparkPlan = plan match {
     case plan: DeltaScanTransformer =>
diff --git a/gluten-delta/src/test/scala/org/apache/gluten/execution/DeltaSuite.scala b/gluten-delta/src/test/scala/org/apache/gluten/execution/DeltaSuite.scala
@@ -85,165 +85,166 @@ abstract class DeltaSuite extends WholeStageTransformerSuite {
   // Regression for issue #10511: with column mapping, a partition column filter must prune
   // partitions correctly. Pre-fix, Gluten rewrote partition filters to physical names, which
   // broke `PreparedDeltaFileIndex.matchingFiles` and silently returned all files.
-  Seq("name", "id").foreach { mode =>
-    testWithMinSparkVersion(
-      s"column mapping mode = $mode with partition filter (single partition col)",
-      "3.2") {
-      withTable("delta_cm_part") {
-        spark.sql(s"""
-                     |create table delta_cm_part (id int, name string) using delta
-                     |partitioned by (id)
-                     |tblproperties ("delta.columnMapping.mode" = "$mode")
-                     |""".stripMargin)
-        // Use multiple inserts so each value lands in its own partition directory & file.
-        spark.sql("insert into delta_cm_part values (1, \"v1\")")
-        spark.sql("insert into delta_cm_part values (2, \"v2\")")
-        spark.sql("insert into delta_cm_part values (3, \"v3\")")
-
-        // Equality on partition column.
-        val df1 = runQueryAndCompare("select name from delta_cm_part where id = 2") { _ => }
-        checkLengthAndPlan(df1, 1)
-        checkAnswer(df1, Row("v2") :: Nil)
-
-        // Range on partition column (the exact case from the bug report).
-        val df2 = runQueryAndCompare("select name from delta_cm_part where id > 2") { _ => }
-        checkLengthAndPlan(df2, 1)
-        checkAnswer(df2, Row("v3") :: Nil)
-
-        // IN list on partition column.
-        val df3 =
-          runQueryAndCompare("select name from delta_cm_part where id in (1, 3)") { _ => }
-        checkLengthAndPlan(df3, 2)
-        checkAnswer(df3, Row("v1") :: Row("v3") :: Nil)
-
-        // Verify pruning actually reached the file index (only the matching partition's file
-        // should be selected).
-        val df4 = spark.sql("select name from delta_cm_part where id = 2")
-        df4.collect()
-        val scan = df4.queryExecution.executedPlan.collect {
-          case f: DeltaScanTransformer => f
-        }.head
-        assert(
-          scan.getPartitions.size == 1,
-          s"expected 1 partition after pruning, got ${scan.getPartitions.size}")
+  Seq("name", "id").foreach {
+    mode =>
+      testWithMinSparkVersion(
+        s"column mapping mode = $mode with partition filter (single partition col)",
+        "3.2") {
+        withTable("delta_cm_part") {
+          spark.sql(s"""
+                       |create table delta_cm_part (id int, name string) using delta
+                       |partitioned by (id)
+                       |tblproperties ("delta.columnMapping.mode" = "$mode")
+                       |""".stripMargin)
+          // Use multiple inserts so each value lands in its own partition directory & file.
+          spark.sql("insert into delta_cm_part values (1, \"v1\")")
+          spark.sql("insert into delta_cm_part values (2, \"v2\")")
+          spark.sql("insert into delta_cm_part values (3, \"v3\")")
+
+          // Equality on partition column.
+          val df1 = runQueryAndCompare("select name from delta_cm_part where id = 2") { _ => }
+          checkLengthAndPlan(df1, 1)
+          checkAnswer(df1, Row("v2") :: Nil)
+
+          // Range on partition column (the exact case from the bug report).
+          val df2 = runQueryAndCompare("select name from delta_cm_part where id > 2") { _ => }
+          checkLengthAndPlan(df2, 1)
+          checkAnswer(df2, Row("v3") :: Nil)
+
+          // IN list on partition column.
+          val df3 =
+            runQueryAndCompare("select name from delta_cm_part where id in (1, 3)") { _ => }
+          checkLengthAndPlan(df3, 2)
+          checkAnswer(df3, Row("v1") :: Row("v3") :: Nil)
+
+          // Verify pruning actually reached the file index (only the matching partition's file
+          // should be selected).
+          val df4 = spark.sql("select name from delta_cm_part where id = 2")
+          df4.collect()
+          val scan = df4.queryExecution.executedPlan.collect {
+            case f: DeltaScanTransformer => f
+          }.head
+          assert(
+            scan.getPartitions.size == 1,
+            s"expected 1 partition after pruning, got ${scan.getPartitions.size}")
+        }
       }
-    }
 
-    testWithMinSparkVersion(
-      s"column mapping mode = $mode with partition filter (multi partition col)",
-      "3.2") {
-      withTable("delta_cm_part_multi") {
-        spark.sql(s"""
-                     |create table delta_cm_part_multi
-                     |  (id int, region string, name string)
-                     |using delta partitioned by (region, id)
-                     |tblproperties ("delta.columnMapping.mode" = "$mode")
-                     |""".stripMargin)
-        spark.sql("insert into delta_cm_part_multi values (1, \"us\", \"v1\")")
-        spark.sql("insert into delta_cm_part_multi values (2, \"us\", \"v2\")")
-        spark.sql("insert into delta_cm_part_multi values (1, \"eu\", \"v3\")")
-        spark.sql("insert into delta_cm_part_multi values (2, \"eu\", \"v4\")")
-
-        val df = runQueryAndCompare(
-          "select name from delta_cm_part_multi where region = 'us' and id > 1") { _ => }
-        checkLengthAndPlan(df, 1)
-        checkAnswer(df, Row("v2") :: Nil)
+      testWithMinSparkVersion(
+        s"column mapping mode = $mode with partition filter (multi partition col)",
+        "3.2") {
+        withTable("delta_cm_part_multi") {
+          spark.sql(s"""
+                       |create table delta_cm_part_multi
+                       |  (id int, region string, name string)
+                       |using delta partitioned by (region, id)
+                       |tblproperties ("delta.columnMapping.mode" = "$mode")
+                       |""".stripMargin)
+          spark.sql("insert into delta_cm_part_multi values (1, \"us\", \"v1\")")
+          spark.sql("insert into delta_cm_part_multi values (2, \"us\", \"v2\")")
+          spark.sql("insert into delta_cm_part_multi values (1, \"eu\", \"v3\")")
+          spark.sql("insert into delta_cm_part_multi values (2, \"eu\", \"v4\")")
+
+          val df = runQueryAndCompare(
+            "select name from delta_cm_part_multi where region = 'us' and id > 1") { _ => }
+          checkLengthAndPlan(df, 1)
+          checkAnswer(df, Row("v2") :: Nil)
+        }
       }
-    }
-
-    testWithMinSparkVersion(
-      s"column mapping mode = $mode with partition + data filter",
-      "3.2") {
-      withTable("delta_cm_part_data") {
-        spark.sql(s"""
-                     |create table delta_cm_part_data (id int, name string, age int)
-                     |using delta partitioned by (id)
-                     |tblproperties ("delta.columnMapping.mode" = "$mode")
-                     |""".stripMargin)
-        spark.sql("insert into delta_cm_part_data values (1, \"a\", 10), (1, \"b\", 20)")
-        spark.sql("insert into delta_cm_part_data values (2, \"c\", 30), (2, \"d\", 40)")
-        spark.sql("insert into delta_cm_part_data values (3, \"e\", 50), (3, \"f\", 60)")
-
-        val df1 = runQueryAndCompare(
-          "select name from delta_cm_part_data where id > 1 and age >= 50") { _ => }
-        checkLengthAndPlan(df1, 2)
-        checkAnswer(df1, Row("e") :: Row("f") :: Nil)
 
-        // Data filter alone — file-level stats skipping should still resolve column names.
-        val df2 = runQueryAndCompare(
-          "select name from delta_cm_part_data where age = 30") { _ => }
-        checkLengthAndPlan(df2, 1)
-        checkAnswer(df2, Row("c") :: Nil)
+      testWithMinSparkVersion(
+        s"column mapping mode = $mode with partition + data filter",
+        "3.2") {
+        withTable("delta_cm_part_data") {
+          spark.sql(s"""
+                       |create table delta_cm_part_data (id int, name string, age int)
+                       |using delta partitioned by (id)
+                       |tblproperties ("delta.columnMapping.mode" = "$mode")
+                       |""".stripMargin)
+          spark.sql("insert into delta_cm_part_data values (1, \"a\", 10), (1, \"b\", 20)")
+          spark.sql("insert into delta_cm_part_data values (2, \"c\", 30), (2, \"d\", 40)")
+          spark.sql("insert into delta_cm_part_data values (3, \"e\", 50), (3, \"f\", 60)")
+
+          val df1 = runQueryAndCompare(
+            "select name from delta_cm_part_data where id > 1 and age >= 50") { _ => }
+          checkLengthAndPlan(df1, 2)
+          checkAnswer(df1, Row("e") :: Row("f") :: Nil)
+
+          // Data filter alone — file-level stats skipping should still resolve column names.
+          val df2 = runQueryAndCompare(
+            "select name from delta_cm_part_data where age = 30") { _ => }
+          checkLengthAndPlan(df2, 1)
+          checkAnswer(df2, Row("c") :: Nil)
+        }
       }
-    }
-
-    testWithMinSparkVersion(
-      s"column mapping mode = $mode with IS [NOT] NULL on partition col",
-      "3.2") {
-      withTable("delta_cm_part_null") {
-        spark.sql(s"""
-                     |create table delta_cm_part_null (id int, name string)
-                     |using delta partitioned by (id)
-                     |tblproperties ("delta.columnMapping.mode" = "$mode")
-                     |""".stripMargin)
-        spark.sql("insert into delta_cm_part_null values (1, \"v1\")")
-        spark.sql("insert into delta_cm_part_null values (2, \"v2\")")
-        spark.sql("insert into delta_cm_part_null values (cast(null as int), \"vn\")")
-
-        val df1 = runQueryAndCompare(
-          "select name from delta_cm_part_null where id is null") { _ => }
-        checkAnswer(df1, Row("vn") :: Nil)
 
-        val df2 = runQueryAndCompare(
-          "select name from delta_cm_part_null where id is not null") { _ => }
-        checkAnswer(df2, Row("v1") :: Row("v2") :: Nil)
+      testWithMinSparkVersion(
+        s"column mapping mode = $mode with IS [NOT] NULL on partition col",
+        "3.2") {
+        withTable("delta_cm_part_null") {
+          spark.sql(s"""
+                       |create table delta_cm_part_null (id int, name string)
+                       |using delta partitioned by (id)
+                       |tblproperties ("delta.columnMapping.mode" = "$mode")
+                       |""".stripMargin)
+          spark.sql("insert into delta_cm_part_null values (1, \"v1\")")
+          spark.sql("insert into delta_cm_part_null values (2, \"v2\")")
+          spark.sql("insert into delta_cm_part_null values (cast(null as int), \"vn\")")
+
+          val df1 = runQueryAndCompare(
+            "select name from delta_cm_part_null where id is null") { _ => }
+          checkAnswer(df1, Row("vn") :: Nil)
+
+          val df2 = runQueryAndCompare(
+            "select name from delta_cm_part_null where id is not null") { _ => }
+          checkAnswer(df2, Row("v1") :: Row("v2") :: Nil)
+        }
       }
-    }
 
-    testWithMinSparkVersion(
-      s"column mapping mode = $mode partition filter survives column rename",
-      "3.2") {
-      withTable("delta_cm_part_rename") {
-        spark.sql(s"""
-                     |create table delta_cm_part_rename (id int, name string)
-                     |using delta partitioned by (id)
-                     |tblproperties ("delta.columnMapping.mode" = "$mode")
-                     |""".stripMargin)
-        spark.sql("insert into delta_cm_part_rename values (1, \"v1\")")
-        spark.sql("insert into delta_cm_part_rename values (2, \"v2\")")
-        spark.sql("insert into delta_cm_part_rename values (3, \"v3\")")
-        // Rename the partition column. The physical name in storage stays the same; only the
-        // logical name changes, so the logical-name-based partition filter must still resolve.
-        spark.sql("alter table delta_cm_part_rename rename column id to pid")
-
-        val df = runQueryAndCompare(
-          "select name from delta_cm_part_rename where pid >= 2") { _ => }
-        checkLengthAndPlan(df, 2)
-        checkAnswer(df, Row("v2") :: Row("v3") :: Nil)
+      testWithMinSparkVersion(
+        s"column mapping mode = $mode partition filter survives column rename",
+        "3.2") {
+        withTable("delta_cm_part_rename") {
+          spark.sql(s"""
+                       |create table delta_cm_part_rename (id int, name string)
+                       |using delta partitioned by (id)
+                       |tblproperties ("delta.columnMapping.mode" = "$mode")
+                       |""".stripMargin)
+          spark.sql("insert into delta_cm_part_rename values (1, \"v1\")")
+          spark.sql("insert into delta_cm_part_rename values (2, \"v2\")")
+          spark.sql("insert into delta_cm_part_rename values (3, \"v3\")")
+          // Rename the partition column. The physical name in storage stays the same; only the
+          // logical name changes, so the logical-name-based partition filter must still resolve.
+          spark.sql("alter table delta_cm_part_rename rename column id to pid")
+
+          val df = runQueryAndCompare(
+            "select name from delta_cm_part_rename where pid >= 2") { _ => }
+          checkLengthAndPlan(df, 2)
+          checkAnswer(df, Row("v2") :: Row("v3") :: Nil)
+        }
       }
-    }
 
-    testWithMinSparkVersion(
-      s"column mapping mode = $mode data column rename + filter (file skipping)",
-      "3.2") {
-      withTable("delta_cm_data_rename") {
-        spark.sql(s"""
-                     |create table delta_cm_data_rename (id int, age int, name string)
-                     |using delta
-                     |tblproperties ("delta.columnMapping.mode" = "$mode")
-                     |""".stripMargin)
-        spark.sql("insert into delta_cm_data_rename values (1, 10, \"a\")")
-        spark.sql("insert into delta_cm_data_rename values (2, 20, \"b\")")
-        spark.sql("insert into delta_cm_data_rename values (3, 30, \"c\")")
-        // Rename a data column. Filter pushdown must still match physical column in parquet.
-        spark.sql("alter table delta_cm_data_rename rename column age to years")
-
-        val df = runQueryAndCompare(
-          "select name from delta_cm_data_rename where years = 20") { _ => }
-        checkLengthAndPlan(df, 1)
-        checkAnswer(df, Row("b") :: Nil)
+      testWithMinSparkVersion(
+        s"column mapping mode = $mode data column rename + filter (file skipping)",
+        "3.2") {
+        withTable("delta_cm_data_rename") {
+          spark.sql(s"""
+                       |create table delta_cm_data_rename (id int, age int, name string)
+                       |using delta
+                       |tblproperties ("delta.columnMapping.mode" = "$mode")
+                       |""".stripMargin)
+          spark.sql("insert into delta_cm_data_rename values (1, 10, \"a\")")
+          spark.sql("insert into delta_cm_data_rename values (2, 20, \"b\")")
+          spark.sql("insert into delta_cm_data_rename values (3, 30, \"c\")")
+          // Rename a data column. Filter pushdown must still match physical column in parquet.
+          spark.sql("alter table delta_cm_data_rename rename column age to years")
+
+          val df = runQueryAndCompare(
+            "select name from delta_cm_data_rename where years = 20") { _ => }
+          checkLengthAndPlan(df, 1)
+          checkAnswer(df, Row("b") :: Nil)
+        }
       }
-    }
   }
 
   test("delta: time travel") {