Skip to content

Commit 0dba913

Browse files
author
Mariam-Almesfer
committed
Enable VARCHAR ↔ timestamp_ntz cast native execution in Velox backend
1 parent 65182e0 commit 0dba913

3 files changed

Lines changed: 28 additions & 11 deletions

File tree

backends-velox/src/test/scala/org/apache/gluten/functions/DateFunctionsValidateSuite.scala

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -599,12 +599,14 @@ class DateFunctionsValidateSuite extends FunctionsValidateSuite {
599599
checkGlutenPlan[BatchScanExecTransformer]
600600
}
601601

602-
// Ensures the fallback of unsupported function works.
603-
runQueryAndCompare("select hour(ts) from view") {
604-
df =>
605-
assert(collect(df.queryExecution.executedPlan) {
606-
case p if p.isInstanceOf[ProjectExec] => p
607-
}.nonEmpty)
602+
// cast(string as timestamp_ntz) runs natively.
603+
// Must use a file-backed scan so Gluten has a native leaf to offload the Project above.
604+
val strPath = dir.getAbsolutePath + "/str_data"
605+
spark.createDataset(inputs).toDF("str")
606+
.coalesce(1).write.mode("overwrite").parquet(strPath)
607+
spark.read.parquet(strPath).createOrReplaceTempView("str_view")
608+
runQueryAndCompare("select cast(str as timestamp_ntz) from str_view") {
609+
checkGlutenPlan[ProjectExecTransformer]
608610
}
609611
}
610612
}

ep/build-velox/src/get-velox.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ RUN_SETUP_SCRIPT=ON
2525
ENABLE_ENHANCED_FEATURES=OFF
2626

2727
# Developer use only for testing Velox PR.
28-
UPSTREAM_VELOX_PR_ID=""
28+
UPSTREAM_VELOX_PR_ID="17498"
2929

3030
OS=`uname -s`
3131

gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import org.apache.gluten.extension.columnar.offload.OffloadSingleNode
2525
import org.apache.gluten.sql.shims.SparkShimLoader
2626

2727
import org.apache.spark.internal.Logging
28+
import org.apache.spark.sql.catalyst.expressions.Cast
2829
import org.apache.spark.sql.execution._
2930
import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
3031
import org.apache.spark.sql.execution.datasources.WriteFilesExec
@@ -33,7 +34,7 @@ import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleEx
3334
import org.apache.spark.sql.execution.joins._
3435
import org.apache.spark.sql.execution.window.WindowExec
3536
import org.apache.spark.sql.hive.HiveTableScanExecTransformer
36-
import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
37+
import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StringType, StructType}
3738

3839
object Validators {
3940
implicit class ValidatorBuilderImplicits(builder: Validator.Builder) {
@@ -262,9 +263,23 @@ object Validators {
262263
case p if HiveTableScanExecTransformer.isHiveTableScan(p) => true
263264
case _ => false
264265
}
265-
val hasNTZ = plan.output.exists(a => containsNTZ(a.dataType)) ||
266-
plan.children.exists(_.output.exists(a => containsNTZ(a.dataType)))
267-
if (isScan || !hasNTZ) {
266+
// Allow: scans, NTZ-consuming ops (e.g. hour(timestamp_ntz)->int),
267+
// and explicit cast(varchar as timestamp_ntz). Fall back for everything else
268+
// that produces NTZ output (literals, pass-through, unknown conversions).
269+
val inputHasNTZ = plan.children.exists(_.output.exists(a => containsNTZ(a.dataType)))
270+
val outputHasNTZ = plan.output.exists(a => containsNTZ(a.dataType))
271+
val isVarcharToNtzCast = !inputHasNTZ && outputHasNTZ && (plan match {
272+
case p: ProjectExec =>
273+
p.projectList.forall {
274+
expr =>
275+
!containsNTZ(expr.dataType) || (expr match {
276+
case c: Cast => c.child.dataType == StringType
277+
case _ => false
278+
})
279+
}
280+
case _ => false
281+
})
282+
if (isScan || !outputHasNTZ || isVarcharToNtzCast) {
268283
return pass()
269284
}
270285
}

0 commit comments

Comments
 (0)