diff --git a/.github/actions/build-wheel/action.yml b/.github/actions/build-wheel/action.yml new file mode 100644 index 000000000..25f75d1f8 --- /dev/null +++ b/.github/actions/build-wheel/action.yml @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Composite action that builds a datafusion-python wheel with maturin. +# Centralises the abi3-vs-free-threaded argument logic so platform jobs +# stay short and changes to wheel-build flags happen in one place. + +name: "Build wheel" +description: "Build datafusion-python wheel with maturin (abi3 or free-threaded)" + +inputs: + target: + description: "Rust target triple (e.g. x86_64-unknown-linux-gnu). Required when manylinux is set; ignored for native builds." + required: false + default: "" + python-tag: + description: "abi3 (covers 3.10..3.14 GIL builds) or a free-threaded interpreter such as 3.13t / 3.14t" + required: true + build-mode: + description: "release or debug" + required: true + features: + description: "Comma-separated extra features (in addition to those implied by the python-tag)" + required: false + default: "substrait" + manylinux: + description: "manylinux tag for maturin-action (e.g. 2_28). Leave empty to use uv-run maturin natively." + required: false + default: "" + out-dir: + description: "Output directory for built wheels" + required: false + default: "dist" + +outputs: + args: + description: "Computed maturin args (for debugging)" + value: ${{ steps.args.outputs.args }} + +runs: + using: "composite" + steps: + - name: Compute maturin args + id: args + shell: bash + run: | + set -euo pipefail + FEATURES="${{ inputs.features }}" + TAG="${{ inputs.python-tag }}" + if [ "$TAG" = "abi3" ]; then + # Default features include the `abi3` cargo feature. + # One wheel covers Python 3.10..3.14 (GIL builds only). + BUILD_ARGS="--features ${FEATURES}" + else + # Free-threaded build: disable abi3, force mimalloc back in, pin interpreter. + if [ "${RUNNER_OS:-}" = "Windows" ]; then + # Windows free-threaded builds ship as `python.exe` (no `tN` + # suffix). Resolve sys.executable so the path is independent of + # PATH ordering, and assert the interpreter is actually + # free-threaded before we hand the wheel off. + INTERP=$(python -c 'import sys; print(sys.executable)') + python -c "import sysconfig, sys; \ + v = sysconfig.get_config_var('Py_GIL_DISABLED'); \ + sys.exit(0 if v == 1 else f'expected free-threaded interpreter, got Py_GIL_DISABLED={v!r} at {sys.executable}')" + # Backslashes in BUILD_ARGS would be parsed as escapes when the + # output is re-expanded in the next step; use forward slashes + # (maturin/Rust accept them on Windows). + INTERP="${INTERP//\\//}" + else + INTERP="python${TAG}" + fi + BUILD_ARGS="--no-default-features --features mimalloc,${FEATURES} --interpreter ${INTERP}" + fi + if [ "${{ inputs.build-mode }}" = "release" ]; then + BUILD_ARGS="--release --strip ${BUILD_ARGS}" + fi + BUILD_ARGS="${BUILD_ARGS} --out ${{ inputs.out-dir }}" + echo "args=${BUILD_ARGS}" >> "$GITHUB_OUTPUT" + echo "maturin args: ${BUILD_ARGS}" + + - name: Build via maturin-action (manylinux container) + if: inputs.manylinux != '' + uses: PyO3/maturin-action@v1 + with: + target: ${{ inputs.target }} + manylinux: ${{ inputs.manylinux }} + maturin-version: "1.13.3" + args: ${{ steps.args.outputs.args }} + rustup-components: rust-std + + - name: Build via native maturin + if: inputs.manylinux == '' + shell: bash + # Use `uvx` so maturin is available even when `uv sync` was skipped + # (free-threaded matrix entries don't pre-populate the project venv). + run: uvx maturin@1.13.3 build ${{ steps.args.outputs.args }} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 37a9dba03..593a343e1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -135,8 +135,12 @@ jobs: # ============================================ build-manylinux-x86_64: needs: [generate-license, lint-rust, lint-python] - name: ManyLinux x86_64 + name: Linux x86_64 (${{ matrix.python-tag }}) runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-tag: ["abi3", "3.13t", "3.14t"] steps: - uses: actions/checkout@v6 @@ -153,7 +157,7 @@ jobs: - name: Cache Cargo uses: Swatinem/rust-cache@v2 with: - key: ${{ inputs.build_mode }} + key: ${{ inputs.build_mode }}-${{ matrix.python-tag }} - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b with: @@ -172,25 +176,18 @@ jobs: free -h swapon --show - - name: Build (release mode) - uses: PyO3/maturin-action@v1 - if: inputs.build_mode == 'release' - with: - target: x86_64-unknown-linux-gnu - manylinux: "2_28" - args: --release --strip --features protoc,substrait --out dist - rustup-components: rust-std - - - name: Build (debug mode) - uses: PyO3/maturin-action@v1 - if: inputs.build_mode == 'debug' + - name: Build wheel + uses: ./.github/actions/build-wheel with: target: x86_64-unknown-linux-gnu + python-tag: ${{ matrix.python-tag }} + build-mode: ${{ inputs.build_mode }} + features: "protoc,substrait" manylinux: "2_28" - args: --features protoc,substrait --out dist - rustup-components: rust-std + # FFI test wheel only needs to be built once per platform; gate to abi3. - name: Build FFI test library + if: matrix.python-tag == 'abi3' uses: PyO3/maturin-action@v1 with: target: x86_64-unknown-linux-gnu @@ -202,10 +199,11 @@ jobs: - name: Archive wheels uses: actions/upload-artifact@v7 with: - name: dist-manylinux-x86_64 + name: dist-manylinux-x86_64-${{ matrix.python-tag }} path: dist/* - name: Archive FFI test wheel + if: matrix.python-tag == 'abi3' uses: actions/upload-artifact@v7 with: name: test-ffi-manylinux-x86_64 @@ -216,8 +214,12 @@ jobs: # ============================================ build-manylinux-aarch64: needs: [generate-license, lint-rust, lint-python] - name: ManyLinux arm64 + name: Linux arm64 (${{ matrix.python-tag }}) runs-on: ubuntu-24.04-arm + strategy: + fail-fast: false + matrix: + python-tag: ["abi3", "3.13t", "3.14t"] steps: - uses: actions/checkout@v6 @@ -234,7 +236,7 @@ jobs: - name: Cache Cargo uses: Swatinem/rust-cache@v2 with: - key: ${{ inputs.build_mode }} + key: ${{ inputs.build_mode }}-${{ matrix.python-tag }} - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b with: @@ -253,29 +255,20 @@ jobs: free -h swapon --show - - name: Build (release mode) - uses: PyO3/maturin-action@v1 - if: inputs.build_mode == 'release' - with: - target: aarch64-unknown-linux-gnu - manylinux: "2_28" - args: --release --strip --features protoc,substrait --out dist - rustup-components: rust-std - - - name: Build (debug mode) - uses: PyO3/maturin-action@v1 - if: inputs.build_mode == 'debug' + - name: Build wheel + uses: ./.github/actions/build-wheel with: target: aarch64-unknown-linux-gnu + python-tag: ${{ matrix.python-tag }} + build-mode: ${{ inputs.build_mode }} + features: "protoc,substrait" manylinux: "2_28" - args: --features protoc,substrait --out dist - rustup-components: rust-std - name: Archive wheels uses: actions/upload-artifact@v7 if: inputs.build_mode == 'release' with: - name: dist-manylinux-aarch64 + name: dist-manylinux-aarch64-${{ matrix.python-tag }} path: dist/* # ============================================ @@ -283,13 +276,13 @@ jobs: # ============================================ build-python-mac-win: needs: [generate-license, lint-rust, lint-python] - name: macOS arm64 & Windows + name: ${{ matrix.os == 'macos-latest' && 'macOS arm64' || 'Windows x86_64' }} (${{ matrix.python-tag }}) runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - python-version: ["3.10"] os: [macos-latest, windows-latest] + python-tag: ["abi3", "3.13t", "3.14t"] steps: - uses: actions/checkout@v6 @@ -305,7 +298,14 @@ jobs: - name: Cache Cargo uses: Swatinem/rust-cache@v2 with: - key: ${{ inputs.build_mode }} + key: ${{ inputs.build_mode }}-${{ matrix.python-tag }} + + - name: Setup Python (free-threaded) + if: matrix.python-tag != 'abi3' + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-tag }} + freethreaded: true - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b with: @@ -318,22 +318,22 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Install dependencies + if: matrix.python-tag == 'abi3' run: uv sync --dev --no-install-package datafusion - # Run clippy BEFORE maturin so we can avoid rebuilding. The features must match - # exactly the features used by maturin. Linux maturin builds need to happen in a - # container so only run this for our mac runner. + # Clippy is interpreter-agnostic; run once per OS (against the abi3 entry) + # so the matrix doesn't pay the cost three times. - name: Run Clippy - if: matrix.os != 'windows-latest' + if: matrix.os != 'windows-latest' && matrix.python-tag == 'abi3' run: cargo clippy --no-deps --all-targets --features substrait -- -D warnings - - name: Build Python package (release mode) - if: inputs.build_mode == 'release' - run: uv run --no-project maturin build --release --strip --features substrait - - - name: Build Python package (debug mode) - if: inputs.build_mode != 'release' - run: uv run --no-project maturin build --features substrait + - name: Build wheel + uses: ./.github/actions/build-wheel + with: + python-tag: ${{ matrix.python-tag }} + build-mode: ${{ inputs.build_mode }} + features: "substrait" + out-dir: "target/wheels" - name: List Windows wheels if: matrix.os == 'windows-latest' @@ -350,7 +350,7 @@ jobs: uses: actions/upload-artifact@v7 if: inputs.build_mode == 'release' with: - name: dist-${{ matrix.os }} + name: dist-${{ matrix.os }}-${{ matrix.python-tag }} path: target/wheels/* # ============================================ @@ -359,11 +359,12 @@ jobs: build-macos-x86_64: if: inputs.build_mode == 'release' needs: [generate-license, lint-rust, lint-python] + name: macOS x86_64 (${{ matrix.python-tag }}) runs-on: macos-15-intel strategy: fail-fast: false matrix: - python-version: ["3.10"] + python-tag: ["abi3", "3.13t", "3.14t"] steps: - uses: actions/checkout@v6 @@ -379,7 +380,14 @@ jobs: - name: Cache Cargo uses: Swatinem/rust-cache@v2 with: - key: ${{ inputs.build_mode }} + key: ${{ inputs.build_mode }}-${{ matrix.python-tag }} + + - name: Setup Python (free-threaded) + if: matrix.python-tag != 'abi3' + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-tag }} + freethreaded: true - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b with: @@ -392,11 +400,16 @@ jobs: repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Install dependencies + if: matrix.python-tag == 'abi3' run: uv sync --dev --no-install-package datafusion - - name: Build (release mode) - run: | - uv run --no-project maturin build --release --strip --features substrait + - name: Build wheel + uses: ./.github/actions/build-wheel + with: + python-tag: ${{ matrix.python-tag }} + build-mode: ${{ inputs.build_mode }} + features: "substrait" + out-dir: "target/wheels" - name: List Mac wheels run: find target/wheels/ @@ -404,7 +417,7 @@ jobs: - name: Archive wheels uses: actions/upload-artifact@v7 with: - name: dist-macos-aarch64 + name: dist-macos-aarch64-${{ matrix.python-tag }} path: target/wheels/* # ============================================ @@ -509,11 +522,12 @@ jobs: with: enable-cache: true - # Download the Linux wheel built in the previous job + # Download the Linux wheel built in the previous job. + # Docs only need the abi3 wheel — interpreter doesn't matter for sphinx. - name: Download pre-built Linux wheel uses: actions/download-artifact@v8 with: - name: dist-manylinux-x86_64 + name: dist-manylinux-x86_64-abi3 path: wheels/ # Install from the pre-built wheels diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c597ab308..b13a3dc89 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,8 +15,9 @@ # specific language governing permissions and limitations # under the License. -# Reusable workflow for running tests -# This ensures the same tests run for both debug (PRs) and release (main/tags) builds +# Reusable workflow for running tests. +# Single matrix covers both GIL (abi3 wheel) and free-threaded +# (per-interpreter wheels) builds. name: Test @@ -32,15 +33,16 @@ jobs: strategy: fail-fast: false matrix: - python-version: - - "3.10" - - "3.11" - - "3.12" - - "3.13" - - "3.14" - toolchain: - - "stable" - + include: + # GIL builds — all share the same abi3 wheel. + - { python-version: "3.10", wheel-tag: "abi3", freethreaded: false } + - { python-version: "3.11", wheel-tag: "abi3", freethreaded: false } + - { python-version: "3.12", wheel-tag: "abi3", freethreaded: false } + - { python-version: "3.13", wheel-tag: "abi3", freethreaded: false } + - { python-version: "3.14", wheel-tag: "abi3", freethreaded: false } + # Free-threaded builds — one wheel per interpreter. + - { python-version: "3.13t", wheel-tag: "3.13t", freethreaded: true } + - { python-version: "3.14t", wheel-tag: "3.14t", freethreaded: true } steps: - uses: actions/checkout@v6 @@ -48,40 +50,38 @@ jobs: uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} + freethreaded: ${{ matrix.freethreaded }} - name: Cache Cargo uses: actions/cache@v5 with: path: ~/.cargo - key: cargo-cache-${{ matrix.toolchain }}-${{ hashFiles('Cargo.lock') }} + key: cargo-cache-stable-${{ hashFiles('Cargo.lock') }} - name: Install dependencies uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b with: enable-cache: true - # Download the Linux wheel built in the build workflow - name: Download pre-built Linux wheel uses: actions/download-artifact@v8 with: - name: dist-manylinux-x86_64 + name: dist-manylinux-x86_64-${{ matrix.wheel-tag }} path: wheels/ - # Download the FFI test wheel + # FFI test wheel only built once (under the abi3 matrix entry in build.yml). - name: Download pre-built FFI test wheel + if: matrix.wheel-tag == 'abi3' uses: actions/download-artifact@v8 with: name: test-ffi-manylinux-x86_64 path: wheels/ - # Install from the pre-built wheels - name: Install from pre-built wheels run: | set -x - uv venv - # Install development dependencies + uv venv --python ${{ matrix.python-version }} uv sync --dev --no-install-package datafusion - # Install all pre-built wheels WHEELS=$(find wheels/ -name "*.whl") if [ -n "$WHEELS" ]; then echo "Installing wheels:" @@ -95,16 +95,22 @@ jobs: - name: Run tests env: RUST_BACKTRACE: 1 + # On free-threaded interpreters, fail loud if any C extension + # re-enables the GIL implicitly. + PYTHON_GIL: ${{ matrix.freethreaded && '0' || '' }} run: | git submodule update --init uv run --no-project pytest -v --import-mode=importlib + # FFI + TPC-H examples only need to run once; gate to abi3 entries. - name: FFI unit tests + if: matrix.wheel-tag == 'abi3' run: | cd examples/datafusion-ffi-example uv run --no-project pytest python/tests/_test*.py - name: Run tpchgen-cli to create 1 Gb dataset + if: matrix.wheel-tag == 'abi3' run: | mkdir examples/tpch/data cd examples/tpch/data @@ -112,6 +118,7 @@ jobs: uv run --no-project tpchgen-cli -s 1 --format=parquet - name: Run TPC-H examples + if: matrix.wheel-tag == 'abi3' run: | cd examples/tpch uv run --no-project pytest _tests.py diff --git a/Cargo.lock b/Cargo.lock index 0c4b77582..3252346e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2928,6 +2928,7 @@ version = "0.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" dependencies = [ + "python3-dll-a", "target-lexicon", ] @@ -2977,6 +2978,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "python3-dll-a" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d80ba7540edb18890d444c5aa8e1f1f99b1bdf26fb26ae383135325f4a36042b" +dependencies = [ + "cc", +] + [[package]] name = "quick-xml" version = "0.39.2" diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index d714dc978..1f5b4e305 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -42,8 +42,7 @@ tokio = { workspace = true, features = [ ] } pyo3 = { workspace = true, features = [ "extension-module", - "abi3", - "abi3-py310", + "generate-import-lib", ] } pyo3-async-runtimes = { workspace = true, features = ["tokio-runtime"] } pyo3-log = { workspace = true } @@ -74,7 +73,11 @@ prost-types = { workspace = true } pyo3-build-config = { workspace = true } [features] -default = ["mimalloc"] +default = ["mimalloc", "abi3"] +# Stable ABI build — single wheel covers Python 3.10..3.14 (GIL builds only). +# Mutually exclusive with free-threaded interpreters (cp313t / cp314t); the +# free-threaded wheel build must pass --no-default-features. +abi3 = ["pyo3/abi3", "pyo3/abi3-py310"] protoc = ["datafusion-substrait/protoc"] substrait = ["dep:datafusion-substrait"] diff --git a/pyproject.toml b/pyproject.toml index a02f4608a..b25d37166 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ classifiers = [ "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Free Threading :: 2 - Beta", "Programming Language :: Python", "Programming Language :: Rust", ]