Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions .github/workflows/bolt_gluten_ut.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# Run the full Gluten UT matrix against the Bolt backend via
# scripts/gluten_ut/run.sh (parallel dispatcher with bwrap per-suite isolation,
# slow-suites priority dispatch, case-level blacklist).

name: Bolt Gluten UT

on:
pull_request:
branches: [ main ]
workflow_dispatch:
inputs:
gluten_repo:
description: 'Gluten repo to check out; leave blank to use the hardcoded default below.'
required: false
default: ''
gluten_ref:
description: 'Gluten branch/tag/sha to check out; leave blank to use the hardcoded default below.'
required: false
default: ''

env:
GLUTEN_REPO: ${{ inputs.gluten_repo || 'zhangxffff/gluten' }}
GLUTEN_REF: ${{ inputs.gluten_ref || 'chore/run_gluten_ut' }}
# Matches build-test.yml β€” ccache + conan cache live under /data on the host.
CCACHE_DIR: /data/ccache-data
CCACHE_MAX_SIZE: '100G'
CI_NUM_THREADS: "16"
IN_CI: '1'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
spark-ut:
runs-on: [ self-hosted, medium ]
container:
image: bolt-registry:5000/bolt-ci:20260114
# bwrap (per-suite isolation in scripts/gluten_ut/run.sh) must create a
# namespace. The runner host disables unprivileged user namespaces, so
# seccomp/apparmor unconfined alone is NOT enough β€” bwrap needs real
# CAP_SYS_ADMIN to take the privileged path (no user namespace) for its
# mounts. cap-add SYS_ADMIN + unconfined seccomp/apparmor grants exactly
# that without the full host exposure of --privileged (no host devices,
# no CAP_SYS_MODULE).
options: --user root --init --cap-add=SYS_ADMIN --security-opt seccomp=unconfined --security-opt apparmor=unconfined
volumes:
- /data/ccache-data:/data/ccache-data
- /data/bolt-gluten-ut-arrow:/root/.m2/repository/org/apache/arrow
- /data/bolt-gluten-ut-spark:/data/bolt-gluten-ut-spark
services:
conanserver:
image: bolt-registry:5000/conan-server:latest
volumes:
- /data/conan-server-data:/var/conan/data
timeout-minutes: 240
steps:
- name: Checkout bolt (this repo)
uses: actions/checkout@v6

- name: Checkout gluten at ${{ env.GLUTEN_REF }}
uses: actions/checkout@v6
with:
repository: ${{ env.GLUTEN_REPO }}
ref: ${{ env.GLUTEN_REF }}
path: gluten

- name: Install JDK 17 + bubblewrap
# Full JDK (not -headless) is required: arrow's cmake JNI detection
# needs AWT, which is only present in the full openjdk-17-jdk package.
# bubblewrap is used to run each suite in a isolated environment.
run: |
apt-get update
apt-get install -y --no-install-recommends openjdk-17-jdk bubblewrap
bwrap --version
echo "JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64" >> $GITHUB_ENV

- name: Set up bolt build environment (conan)
uses: ./.github/actions/bolt-build-base

- name: Align conan default profile with bolt.profile
# bolt's Makefile installs with `-pr default -pr scripts/conan/bolt.profile`,
# but gluten's Makefile only uses `-pr default`. merge to default so that gluten build
# with same profile with bolt.
run: cat scripts/conan/bolt.profile >> ~/.conan2/profiles/default

- name: Build local bolt (make release_spark)
run: make release_spark && make export_release

- name: Build gluten native libs (make release)
working-directory: ${{ github.workspace }}/gluten
run: make release

- name: Build gluten-arrow if jars missing or patches changed
working-directory: ${{ github.workspace }}/gluten
run: |
ARROW_DIR=/root/.m2/repository/org/apache/arrow
STAMP=$ARROW_DIR/.gluten-15.0.0.stamp
KEY=$(sha256sum ep/build-velox/src/modify_arrow*.patch \
ep/build-velox/src/cmake-compatibility.patch \
dev/build_arrow.sh 2>/dev/null \
| sha256sum | cut -d' ' -f1)
mkdir -p "$ARROW_DIR"
(
flock -x 200
if [[ -f $STAMP && "$(cat $STAMP)" == "$KEY" ]] \
&& ls $ARROW_DIR/arrow-dataset/15.0.0-gluten/*.jar > /dev/null 2>&1; then
echo "arrow jars match stamp $KEY β€” skip rebuild"
else
bash dev/build_arrow.sh
echo "$KEY" > "$STAMP"
fi
) 200> "$ARROW_DIR/.lock"

- name: Populate Spark binary + source SQL test resources on /data
# Layout under /data/bolt-gluten-ut-spark/:
# spark_home/ β€” Spark 3.5.5 binary dist. SPARK_HOME points here.
# spark_src/ β€” full Spark 3.5.5 source tree.
# spark_home/sql β†’ ../spark_src/sql β€” gluten tests read source sql/ from here.
run: |
set -e
BASE=/data/bolt-gluten-ut-spark
[[ -d "$BASE/spark_home/jars" && -d "$BASE/spark_src/sql" && -L "$BASE/spark_home/sql" ]] && exit 0
mkdir -p "$BASE"
# flock guards against concurrent CI jobs
(
flock -x 200
[[ -d "$BASE/spark_home/jars" && -d "$BASE/spark_src/sql" && -L "$BASE/spark_home/sql" ]] && exit 0
command -v aria2c > /dev/null \
|| { apt-get update -qq && apt-get install -y --no-install-recommends aria2; }
URL=https://archive.apache.org/dist/spark/spark-3.5.5
cd "$BASE"
aria2c --quiet -x16 -s16 -k1M -o bin.tgz "$URL/spark-3.5.5-bin-hadoop3.tgz"
aria2c --quiet -x16 -s16 -k1M -o src.tgz "$URL/spark-3.5.5.tgz"
mkdir -p spark_home spark_src
tar -xzf bin.tgz --strip-components=1 -C spark_home
tar -xzf src.tgz --strip-components=1 -C spark_src
ln -sfn ../spark_src/sql spark_home/sql
rm -f bin.tgz src.tgz
) 200> "$BASE/.lock"

- name: Run Gluten UT (parallel, blacklist-aware)
env:
GLUTEN_HOME: ${{ github.workspace }}/gluten
SPARK_HOME: /data/bolt-gluten-ut-spark/spark_home/
JOBS: '8'
run: bash scripts/gluten_ut/run.sh

- name: Upload test reports
if: always()
uses: actions/upload-artifact@v4
with:
name: bolt-gluten-ut-reports
path: |
scripts/gluten_ut/logs/*.log
scripts/gluten_ut/logs/*.tsv
scripts/gluten_ut/logs/reports/**/TEST-*.xml
if-no-files-found: warn
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,7 @@ files.txt
log.txt

bolt/version/version.h

# gluten UT runner output (parallel dispatch logs + reports)
/scripts/gluten_ut/logs/
/scripts/gluten_ut/stdout
19 changes: 19 additions & 0 deletions scripts/gluten_ut/blacklist.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
org.apache.gluten.config.AllBoltConfiguration#Check bolt backend configs
org.apache.gluten.execution.BoltExplodeExpressionSuite#(aborted)
org.apache.gluten.execution.BoltScanSuite#Test file scheme validation
org.apache.gluten.execution.BoltScanSuite#parquet index based schema evolution
org.apache.gluten.execution.python.ArrowEvalPythonExecSuite#arrow_udf test: with preprojection
org.apache.gluten.execution.python.ArrowEvalPythonExecSuite#arrow_udf test: with unrelated projection
org.apache.gluten.execution.python.ArrowEvalPythonExecSuite#arrow_udf test: without projection
org.apache.gluten.extension.columnar.transition.BoltTransitionSuite#(aborted)
org.apache.gluten.functions.JsonFunctionsValidateSuite#json_object_keys
org.apache.spark.sql.GlutenJsonFunctionsSuite#roundtrip in to_json and from_json - array
org.apache.spark.sql.GlutenSQLQueryTestSuite#datetime-parsing-invalid.sql
org.apache.spark.sql.catalyst.expressions.GlutenCastSuite#Gluten - cast string to timestamp
org.apache.spark.sql.catalyst.expressions.GlutenDateExpressionsSuite#Gluten - to_unix_timestamp
org.apache.spark.sql.catalyst.expressions.GlutenDateExpressionsSuite#Gluten - unix_timestamp
org.apache.spark.sql.catalyst.expressions.GlutenTryCastSuite#Gluten - cast string to timestamp
org.apache.spark.sql.execution.BoltLocalCacheSuite#(aborted)
org.apache.spark.sql.execution.GlutenCoalesceShufflePartitionsSuite#Gluten - determining the number of reducers: plan already partitioned
org.apache.spark.sql.execution.GlutenCoalesceShufflePartitionsSuite#Gluten - determining the number of reducers: plan already partitioned(minNumPostShufflePartitions: 5)
org.apache.spark.sql.execution.datasources.parquet.GlutenParquetIOSuite#SPARK-34817: Read UINT_64 as Decimal from parquet
Loading
Loading