From 2fa93779d0eb238f31850fa002e642be00157571 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 11 Feb 2026 07:01:12 +0800 Subject: [PATCH 01/10] Enhance CI workflow and optimize release profile settings --- .github/workflows/extended.yml | 12 +++++++++--- Cargo.toml | 6 +++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml index e9eb27dd96527..c7216b5628a4d 100644 --- a/.github/workflows/extended.yml +++ b/.github/workflows/extended.yml @@ -167,11 +167,17 @@ jobs: uses: ./.github/actions/setup-builder with: rust-version: stable + - name: Build sqllogictest binary + run: | + TEST_BIN=$(cargo test --features backtrace,parquet_encryption --profile release-nonlto --test sqllogictests --no-run --message-format=json | sed -n 's/.*"executable":"\([^"]*\)".*/\1/p' | head -n 1) + if [ -z "$TEST_BIN" ]; then + echo "Could not find sqllogictests test binary" + exit 1 + fi + echo "TEST_BIN=$TEST_BIN" >> "$GITHUB_ENV" - name: Run sqllogictest run: | - cargo test --features backtrace,parquet_encryption --profile release-nonlto --test sqllogictests -- --include-sqlite + "$TEST_BIN" --include-sqlite cargo clean - - diff --git a/Cargo.toml b/Cargo.toml index 157dd68b9cdb0..f0f990818b6c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -249,13 +249,13 @@ strip = true # Eliminate debug information to minimize binary size [profile.release-nonlto] codegen-units = 16 debug-assertions = false -incremental = false +debug = false +incremental = true inherits = "release" lto = false -opt-level = 3 +opt-level = 2 overflow-checks = false rpath = false -strip = false # Retain debug info for flamegraphs [profile.ci] debug = false From 1b4dbee23963fdbb6f8ded0336a12b813d9b4da6 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 11 Feb 2026 08:24:02 +0800 Subject: [PATCH 02/10] fix: Include extended.yml and Cargo.toml in workflow trigger paths --- .github/workflows/extended.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml index c7216b5628a4d..5c80daf20699e 100644 --- a/.github/workflows/extended.yml +++ b/.github/workflows/extended.yml @@ -45,6 +45,8 @@ on: - 'datafusion/expr*/**/*.rs' - 'datafusion/optimizer/**/*.rs' - 'datafusion-testing' + - '.github/workflows/extended.yml' + - 'Cargo.toml' workflow_dispatch: inputs: pr_number: From be84d47943b22f290d5ef46b40e417d621073870 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 11 Feb 2026 08:48:24 +0800 Subject: [PATCH 03/10] ci: run sqllogictest binary from crate dir --- .github/workflows/extended.yml | 40 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml index 5c80daf20699e..3ac88fb1d7aa7 100644 --- a/.github/workflows/extended.yml +++ b/.github/workflows/extended.yml @@ -35,28 +35,28 @@ on: # support extended test suite for release candidate branches, # it is not expected to have many changes in these branches, # so running extended tests is not a burden - - 'branch-*' + - "branch-*" # Also run for changes to some critical areas that are most likely # to trigger errors in extended tests pull_request: - branches: [ '**' ] + branches: ["**"] paths: - - 'datafusion/physical*/**/*.rs' - - 'datafusion/expr*/**/*.rs' - - 'datafusion/optimizer/**/*.rs' - - 'datafusion-testing' - - '.github/workflows/extended.yml' - - 'Cargo.toml' + - "datafusion/physical*/**/*.rs" + - "datafusion/expr*/**/*.rs" + - "datafusion/optimizer/**/*.rs" + - "datafusion-testing" + - ".github/workflows/extended.yml" + - "Cargo.toml" workflow_dispatch: inputs: pr_number: - description: 'Pull request number' + description: "Pull request number" type: string check_run_id: - description: 'Check run ID for status updates' + description: "Check run ID for status updates" type: string pr_head_sha: - description: 'PR head SHA' + description: "PR head SHA" type: string permissions: @@ -64,14 +64,13 @@ permissions: checks: write jobs: - # Check crate compiles and base cargo check passes linux-build-lib: name: linux build test runs-on: ubuntu-latest # note: do not use amd/rust container to preserve disk space steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push submodules: true @@ -95,13 +94,13 @@ jobs: runs-on: ubuntu-latest # note: do not use amd/rust container to preserve disk space steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push submodules: true fetch-depth: 1 - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - name: Install Rust run: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y @@ -139,7 +138,7 @@ jobs: container: image: amd64/rust steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push submodules: true @@ -160,7 +159,7 @@ jobs: container: image: amd64/rust steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push submodules: true @@ -179,7 +178,8 @@ jobs: echo "TEST_BIN=$TEST_BIN" >> "$GITHUB_ENV" - name: Run sqllogictest run: | - "$TEST_BIN" --include-sqlite + ( + cd datafusion/sqllogictest + "$TEST_BIN" --include-sqlite + ) cargo clean - - From 304d28d17460ac938a06a99bd0115c1695e4bd8c Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 11 Feb 2026 10:04:09 +0800 Subject: [PATCH 04/10] revert fix: Include extended.yml and Cargo.toml in workflow trigger paths --- .github/workflows/extended.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml index 3ac88fb1d7aa7..a03c75b553821 100644 --- a/.github/workflows/extended.yml +++ b/.github/workflows/extended.yml @@ -45,8 +45,6 @@ on: - "datafusion/expr*/**/*.rs" - "datafusion/optimizer/**/*.rs" - "datafusion-testing" - - ".github/workflows/extended.yml" - - "Cargo.toml" workflow_dispatch: inputs: pr_number: From 7b5b2a90ae0c83b57d67cba5ff583c92cc47049c Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 11 Feb 2026 10:10:29 +0800 Subject: [PATCH 05/10] feat: update release profile settings in Cargo.toml - Set incremental to false for release-nonlto profile - Upgrade optimization level to 3 - Retain debug information for flamegraphs by setting strip to false --- Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f0f990818b6c9..157dd68b9cdb0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -249,13 +249,13 @@ strip = true # Eliminate debug information to minimize binary size [profile.release-nonlto] codegen-units = 16 debug-assertions = false -debug = false -incremental = true +incremental = false inherits = "release" lto = false -opt-level = 2 +opt-level = 3 overflow-checks = false rpath = false +strip = false # Retain debug info for flamegraphs [profile.ci] debug = false From 8d9bcb8f2079f6e75264d5f264c831c121e8d3a9 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Mon, 9 Feb 2026 15:39:34 +0800 Subject: [PATCH 06/10] trigger CI extended tests --- .../simplify_expressions/expr_simplifier.rs | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index c6644e008645a..feb09db1d688a 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -648,6 +648,12 @@ impl ConstEvaluator { } Expr::Cast(Cast { expr, data_type }) | Expr::TryCast(TryCast { expr, data_type }) => { + // Fast path: only struct targets need struct-specific foldability checks. + // For non-struct casts, avoid deriving the source type from an empty schema. + if !matches!(data_type, DataType::Struct(_)) { + return true; + } + if let ( Ok(DataType::Struct(source_fields)), DataType::Struct(target_fields), @@ -5339,4 +5345,27 @@ mod tests { "Struct cast with empty (0-row) array should remain unchanged" ); } + + #[test] + fn test_cast_heavy_non_struct_chain_foldable() { + // Exercise a cast-heavy, non-struct simplification path to protect against + // planner regressions in cast eligibility checks. + let expr = (0..64).fold( + Expr::Literal(ScalarValue::Int32(Some(7)), None), + |acc, i| { + let target_type = if i % 2 == 0 { + DataType::Int64 + } else { + DataType::Int32 + }; + Expr::Cast(Cast::new(Box::new(acc), target_type)) + }, + ); + + let simplifier = + ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema())); + let result = simplifier.simplify(expr).unwrap(); + + assert_eq!(result, Expr::Literal(ScalarValue::Int32(Some(7)), None)); + } } From 3e64ee22016a75e3efa450cdd9f866a00cdff5f2 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 11 Feb 2026 10:34:47 +0800 Subject: [PATCH 07/10] Revert "trigger CI extended tests" This reverts commit 8d9bcb8f2079f6e75264d5f264c831c121e8d3a9. --- .../simplify_expressions/expr_simplifier.rs | 29 ------------------- 1 file changed, 29 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index feb09db1d688a..c6644e008645a 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -648,12 +648,6 @@ impl ConstEvaluator { } Expr::Cast(Cast { expr, data_type }) | Expr::TryCast(TryCast { expr, data_type }) => { - // Fast path: only struct targets need struct-specific foldability checks. - // For non-struct casts, avoid deriving the source type from an empty schema. - if !matches!(data_type, DataType::Struct(_)) { - return true; - } - if let ( Ok(DataType::Struct(source_fields)), DataType::Struct(target_fields), @@ -5345,27 +5339,4 @@ mod tests { "Struct cast with empty (0-row) array should remain unchanged" ); } - - #[test] - fn test_cast_heavy_non_struct_chain_foldable() { - // Exercise a cast-heavy, non-struct simplification path to protect against - // planner regressions in cast eligibility checks. - let expr = (0..64).fold( - Expr::Literal(ScalarValue::Int32(Some(7)), None), - |acc, i| { - let target_type = if i % 2 == 0 { - DataType::Int64 - } else { - DataType::Int32 - }; - Expr::Cast(Cast::new(Box::new(acc), target_type)) - }, - ); - - let simplifier = - ExprSimplifier::new(SimplifyContext::default().with_schema(test_schema())); - let result = simplifier.simplify(expr).unwrap(); - - assert_eq!(result, Expr::Literal(ScalarValue::Int32(Some(7)), None)); - } } From e0ae244590312fcc45773bcd0a2986e944ed7bb4 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Wed, 11 Feb 2026 12:09:54 +0800 Subject: [PATCH 08/10] formatting --- .github/workflows/extended.yml | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml index a03c75b553821..390f14287ca8b 100644 --- a/.github/workflows/extended.yml +++ b/.github/workflows/extended.yml @@ -35,26 +35,26 @@ on: # support extended test suite for release candidate branches, # it is not expected to have many changes in these branches, # so running extended tests is not a burden - - "branch-*" + - 'branch-*' # Also run for changes to some critical areas that are most likely # to trigger errors in extended tests pull_request: - branches: ["**"] + branches: [ '**' ] paths: - - "datafusion/physical*/**/*.rs" - - "datafusion/expr*/**/*.rs" - - "datafusion/optimizer/**/*.rs" - - "datafusion-testing" + - 'datafusion/physical*/**/*.rs' + - 'datafusion/expr*/**/*.rs' + - 'datafusion/optimizer/**/*.rs' + - 'datafusion-testing' workflow_dispatch: inputs: pr_number: - description: "Pull request number" + description: 'Pull request number' type: string check_run_id: - description: "Check run ID for status updates" + description: 'Check run ID for status updates' type: string pr_head_sha: - description: "PR head SHA" + description: 'PR head SHA' type: string permissions: @@ -62,13 +62,14 @@ permissions: checks: write jobs: + # Check crate compiles and base cargo check passes linux-build-lib: name: linux build test runs-on: ubuntu-latest # note: do not use amd/rust container to preserve disk space steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push submodules: true @@ -92,13 +93,13 @@ jobs: runs-on: ubuntu-latest # note: do not use amd/rust container to preserve disk space steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push submodules: true fetch-depth: 1 - name: Free Disk Space (Ubuntu) - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - name: Install Rust run: | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y @@ -136,7 +137,7 @@ jobs: container: image: amd64/rust steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push submodules: true @@ -157,7 +158,7 @@ jobs: container: image: amd64/rust steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: ref: ${{ github.event.inputs.pr_head_sha }} # will be empty if triggered by push submodules: true From ceb7d7645e0deae691eb9ceafc64c5be4d2bf355 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 12 Feb 2026 12:21:23 +0800 Subject: [PATCH 09/10] fix: Update sqllogictest binary build process and adjust working directory --- .github/workflows/extended.yml | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml index 390f14287ca8b..9957e135fe8e9 100644 --- a/.github/workflows/extended.yml +++ b/.github/workflows/extended.yml @@ -169,16 +169,34 @@ jobs: rust-version: stable - name: Build sqllogictest binary run: | - TEST_BIN=$(cargo test --features backtrace,parquet_encryption --profile release-nonlto --test sqllogictests --no-run --message-format=json | sed -n 's/.*"executable":"\([^"]*\)".*/\1/p' | head -n 1) + # Cargo emits test binaries with hashed filenames under target/.../deps. + TEST_BIN=$(cargo build --profile release-nonlto --features backtrace,parquet_encryption --package datafusion-sqllogictest --test sqllogictests --message-format=json | python - <<'PY' +import json +import sys + +for line in sys.stdin: + try: + payload = json.loads(line) + except json.JSONDecodeError: + continue + if payload.get("reason") != "compiler-artifact": + continue + target = payload.get("target") or {} + if target.get("name") == "sqllogictests" and "test" in target.get("kind", []): + executable = payload.get("executable") + if executable: + print(executable) + break +PY + ) if [ -z "$TEST_BIN" ]; then echo "Could not find sqllogictests test binary" exit 1 fi echo "TEST_BIN=$TEST_BIN" >> "$GITHUB_ENV" - name: Run sqllogictest + working-directory: datafusion/sqllogictest run: | - ( - cd datafusion/sqllogictest - "$TEST_BIN" --include-sqlite - ) + # sqllogictests expects crate-relative paths for test data. + "$TEST_BIN" --include-sqlite cargo clean From 882251a56c1e3e6a839cb7bddc0d1c69680c0b02 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 12 Feb 2026 12:30:22 +0800 Subject: [PATCH 10/10] fix: Simplify sqllogictest binary retrieval in extended tests workflow --- .github/workflows/extended.yml | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml index 9957e135fe8e9..a00b22edbd570 100644 --- a/.github/workflows/extended.yml +++ b/.github/workflows/extended.yml @@ -170,25 +170,8 @@ jobs: - name: Build sqllogictest binary run: | # Cargo emits test binaries with hashed filenames under target/.../deps. - TEST_BIN=$(cargo build --profile release-nonlto --features backtrace,parquet_encryption --package datafusion-sqllogictest --test sqllogictests --message-format=json | python - <<'PY' -import json -import sys - -for line in sys.stdin: - try: - payload = json.loads(line) - except json.JSONDecodeError: - continue - if payload.get("reason") != "compiler-artifact": - continue - target = payload.get("target") or {} - if target.get("name") == "sqllogictests" and "test" in target.get("kind", []): - executable = payload.get("executable") - if executable: - print(executable) - break -PY - ) + # We use head to pick the first matching sqllogictests artifact from the JSON stream. + TEST_BIN=$(cargo build --profile release-nonlto --features backtrace,parquet_encryption --package datafusion-sqllogictest --test sqllogictests --message-format=json | sed -n 's/.*"executable":"\([^"]*\)".*/\1/p' | head -n 1) if [ -z "$TEST_BIN" ]; then echo "Could not find sqllogictests test binary" exit 1