From cca31292cfed59388173b0eda74fb4093e8b34cb Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Sat, 16 May 2026 13:03:28 +0200 Subject: [PATCH 1/6] Run RISC-V tests with multiple RVV QEMU configurations Given RISC-V allows different hardware implementations to have different vector length (similar to ARM SVE), we want to make sure that we test on different configurations. Luckily, QEMU allows us to simply set a vlen=<128,256,512,...> parameter on QEMU_CPU to emulate different vector length. --- .github/workflows/_test_riscv.yml | 11 +++++++++-- .github/workflows/riscv64.yml | 14 ++++++++++++++ examples/riscv/run.sh | 4 ++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_test_riscv.yml b/.github/workflows/_test_riscv.yml index da19dfc9bda..e7a93a36d10 100644 --- a/.github/workflows/_test_riscv.yml +++ b/.github/workflows/_test_riscv.yml @@ -27,6 +27,10 @@ on: required: false type: boolean default: false + qemu-cpu: + description: 'Configuration(s) for the CPU to emulate with QEMU, expecting a JSON array' + required: true + type: string gcc-version: description: 'The version of GCC to use' required: false @@ -52,5 +56,8 @@ jobs: source .ci/scripts/utils.sh install_executorch "--use-pt-pinned-commit" - export GCC_VERSION=${{ inputs.gcc-version }} - bash .ci/scripts/test_riscv_qemu.sh --model="${{ inputs.model }}" ${{ inputs.xnnpack && '--xnnpack' || '' }} ${{ inputs.quantize && '--quantize' || '' }} + echo '${{ inputs.qemu-cpu }}' | jq -r '.[]' | while IFS= read -r qemu_cpu; do + export QEMU_CPU="${qemu_cpu}" + export GCC_VERSION=${{ inputs.gcc-version }} + bash .ci/scripts/test_riscv_qemu.sh --model="${{ inputs.model }}" ${{ inputs.xnnpack && '--xnnpack' || '' }} ${{ inputs.quantize && '--quantize' || '' }} + done diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml index 9823db09cc1..d7beae7dc8e 100644 --- a/.github/workflows/riscv64.yml +++ b/.github/workflows/riscv64.yml @@ -50,6 +50,20 @@ jobs: model: ${{ matrix.model }} xnnpack: ${{ matrix.xnnpack }} quantize: ${{ matrix.quantize }} + # If XNNPACK, test with multiple RVV length, disabled otherwise + qemu-cpu: >- + ${{ + case( + matrix.xnnpack, '[ + "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=128,elen=64,vext_spec=v1.0", + "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=256,elen=64,vext_spec=v1.0", + "rv64,zba=true,zbb=true,zbs=true,v=true,vlen=512,elen=64,vext_spec=v1.0" + ]', + '[ + "rv64,zba=true,zbb=true,zbs=true,v=false" + ]' + ) + }} # XNNPACK requires GCC 14+ gcc-version: ${{ matrix.xnnpack && 14 || 11 }} docker-image: ${{ matrix.xnnpack && 'ci-image:executorch-ubuntu-24.04-gcc14' || 'ci-image:executorch-ubuntu-22.04-gcc11' }} diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh index 644944ab8a4..d6e86031ac9 100755 --- a/examples/riscv/run.sh +++ b/examples/riscv/run.sh @@ -111,6 +111,10 @@ hash "${qemu}" 2>/dev/null || { # linker (ld-linux-riscv64-lp64d.so.1) referenced in the ELF resolves. export QEMU_LD_PREFIX="${QEMU_LD_PREFIX:-/usr/riscv64-linux-gnu}" +if [[ -n "${QEMU_CPU+x}" ]]; then + echo "[run.sh] QEMU_CPU=${QEMU_CPU}" +fi + log_file=$(mktemp) trap 'rm -f "${log_file}"' EXIT From 7eba60a0d4d28ba95b7c6d818f39d20e17a3bb04 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Sat, 16 May 2026 13:29:15 +0200 Subject: [PATCH 2/6] Add XNNPACK coverage instrumentation for riscv64 --- .ci/scripts/test_riscv_qemu.sh | 15 +- .github/workflows/_test_riscv.yml | 28 ++- backends/xnnpack/CMakeLists.txt | 6 + examples/riscv/etdump_summary.py | 228 +++++++++++++++++++++++++ examples/riscv/run.sh | 25 ++- tools/cmake/preset/riscv64_linux.cmake | 12 ++ 6 files changed, 306 insertions(+), 8 deletions(-) create mode 100644 examples/riscv/etdump_summary.py diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh index 0d8b2815f74..d1998561553 100755 --- a/.ci/scripts/test_riscv_qemu.sh +++ b/.ci/scripts/test_riscv_qemu.sh @@ -18,15 +18,18 @@ model="add" xnnpack=false quantize=false verbose=false +verbose_xnnpack=false usage() { cat < Which model to export and run (default: add) - --xnnpack Enable the XNNPACK backend (AOT partitioner + runtime) - --quantize Produce an 8-bit quantized model - -h, --help Show this help + --model= Which model to export and run (default: add) + --xnnpack Enable the XNNPACK backend (AOT partitioner + runtime) + --quantize Produce an 8-bit quantized model + --verbose Enable XNNPACK partitioner DEBUG logging and dump the lowered graph + --verbose-xnnpack Build XNNPACK with XNN_LOG_LEVEL=4 to log microkernel dispatch + -h, --help Show this help EOF } @@ -36,6 +39,7 @@ for arg in "$@"; do --xnnpack) xnnpack=true ;; --quantize) quantize=true ;; --verbose) verbose=true ;; + --verbose-xnnpack) verbose_xnnpack=true ;; -h|--help) usage; exit 0 ;; *) echo "Unknown option: $arg" >&2; usage; exit 1 ;; esac @@ -51,6 +55,9 @@ fi if ${verbose}; then run_extra_args+=(--verbose) fi +if ${verbose_xnnpack}; then + run_extra_args+=(--verbose-xnnpack) +fi bash "${et_root_dir}/examples/riscv/setup.sh" bash "${et_root_dir}/examples/riscv/run.sh" --model="${model}" "${run_extra_args[@]}" diff --git a/.github/workflows/_test_riscv.yml b/.github/workflows/_test_riscv.yml index e7a93a36d10..4be2732016e 100644 --- a/.github/workflows/_test_riscv.yml +++ b/.github/workflows/_test_riscv.yml @@ -59,5 +59,31 @@ jobs: echo '${{ inputs.qemu-cpu }}' | jq -r '.[]' | while IFS= read -r qemu_cpu; do export QEMU_CPU="${qemu_cpu}" export GCC_VERSION=${{ inputs.gcc-version }} - bash .ci/scripts/test_riscv_qemu.sh --model="${{ inputs.model }}" ${{ inputs.xnnpack && '--xnnpack' || '' }} ${{ inputs.quantize && '--quantize' || '' }} + bash .ci/scripts/test_riscv_qemu.sh \ + --model="${{ inputs.model }}" \ + ${{ inputs.xnnpack && '--xnnpack --verbose-xnnpack' || '' }} \ + ${{ inputs.quantize && '--quantize' || '' }} + + # Generate markdown table from riscv_test/${{ inputs.model }}_riscv.etdump.json, sorted by sum_ms + ( + etdump_json="riscv_test/${{ inputs.model }}_riscv.etdump.json" + echo "### Model=${{ inputs.model }} XNNPACK=${{ inputs.xnnpack }} Quantize=${{ inputs.quantize }} QEMU_CPU='${QEMU_CPU}'" + jq -r ' + def r3: (. * 1000 | round) / 1000; + ["Section","Op","Count","Sum (ms)","Avg (ms)","Max (ms)","Microkernels"], + ["---","---","---","---","---","---","---"], + ( [ (.ops[] | . + {section: "ops"}), + (.framework[] | . + {section: "framework"}) ] + | sort_by(-.sum_ms) | .[] + | [.section, .op, .count, (.sum_ms|r3), (.avg_ms|r3), (.max_ms|r3), ((.kernels // []) | join(", "))] ) + | "| " + (map(tostring) | join(" | ")) + " |" + ' "${etdump_json}" + echo + echo "
Registered XNNPACK microkernels" + echo + jq -r '.registered_kernels[] | "- `" + . + "`"' "${etdump_json}" + echo + echo "
" + echo + ) >> $GITHUB_STEP_SUMMARY done diff --git a/backends/xnnpack/CMakeLists.txt b/backends/xnnpack/CMakeLists.txt index 1b46c993b17..cd0d945a84f 100644 --- a/backends/xnnpack/CMakeLists.txt +++ b/backends/xnnpack/CMakeLists.txt @@ -169,6 +169,12 @@ install( EXPORT ExecuTorchTargets DESTINATION ${CMAKE_INSTALL_LIBDIR} ) +if(DEFINED EXECUTORCH_XNNPACK_LOG_LEVEL) + target_compile_definitions( + xnnpack-logging PUBLIC XNN_LOG_LEVEL=${EXECUTORCH_XNNPACK_LOG_LEVEL} + ) +endif() + if(BUILD_TESTING) add_subdirectory(test) endif() diff --git a/examples/riscv/etdump_summary.py b/examples/riscv/etdump_summary.py new file mode 100644 index 00000000000..e4fc5a61d7e --- /dev/null +++ b/examples/riscv/etdump_summary.py @@ -0,0 +1,228 @@ +# Copyright 2026 The ExecuTorch Authors. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""Render a per-XNNPACK-op summary from an ETDump file.""" + +import argparse +import json +import re +import sys +from collections import defaultdict +from pathlib import Path + +from executorch.devtools import Inspector + + +# "Convolution (NHWC, F32) IGEMM #3" -> ("Convolution (NHWC, F32) IGEMM", 3) +_SEQ_RE = re.compile(r"^(.*?)\s+#(\d+)$") + +# Wrappers around per-op events; kept separate to avoid double-counting children. +FRAMEWORK_EVENTS = frozenset( + { + "Method::execute", + "Method::init", + "Program::load_method", + "DELEGATE_CALL", + "OPERATOR_CALL", + } +) + +_REG_LOG_RE = re.compile(r"Note \(XNNPACK\):.*microkernel '([^']+)'") + + +def parse_run_log(path: Path): + syms = set() + with open(path, errors="ignore") as f: + for line in f: + m = _REG_LOG_RE.search(line) + if m: + syms.add(m.group(1)) + return sorted(syms) + + +# Two-source mapping from an ETDump op name to a symbol-substring pattern. +# When the operator type uses xnn_microkernel_type_default, runtime.c does NOT +# append a category suffix, so we fall back to matching on the base op name. +_OP_NAME_RE = re.compile(r"^(.*?)\s*\(([^)]*)\)\s*(.*)$") +_DTYPE_TOKENS = frozenset( + { + "F32", + "F16", + "QS8", + "QU8", + "QC8", + "QC4", + "QD8", + "QC8W", + "QC4W", + "X8", + "X16", + "X24", + "X32", + "X64", + } +) +# Infix between the kind token and `_ukernel_`: zero or more `_` +# segments (e.g. `_gemm_ukernel_`, `_gemm_minmax_ukernel_`, +# `_gemm_minmax_fp32_ukernel_`, ...). +_INFIX = r"(?:[a-z0-9]+_)*" +_KIND_PATTERN = { + # Microkernel categories appended by runtime.c (xnn_microkernel_type_to_string). + "GEMM": r"_gemm_" + _INFIX + r"ukernel_", + "IGEMM": r"_igemm_" + _INFIX + r"ukernel_", + "DWConv": r"_dwconv_" + _INFIX + r"ukernel_", + "Transpose": r"_transposec?_" + _INFIX + r"ukernel_", + "Reduce": r"_(?:rsum|rmax|rminmax|rdmax|rdsum)_" + _INFIX + r"ukernel_", + "Reduce2": r"_(?:rdmax|rdsum)_" + _INFIX + r"ukernel_", + "VMulCAddC": r"_vmulcaddc_" + _INFIX + r"ukernel_", + "Average Pooling": r"_(?:avgpool|gavgpool)_" + _INFIX + r"ukernel_", + "Pixelwise Average Pooling": r"_pavgpool_" + _INFIX + r"ukernel_", + "Conv2D HWC2CHW": r"_conv_hwc2chw_" + _INFIX + r"ukernel_", + "SPMM": r"_spmm_" + _INFIX + r"ukernel_", + "Subconv2D": r"_subconv2d_" + _INFIX + r"ukernel_", + # Base op names (default microkernel type, no category suffix in the ETDump name). + "Add": r"_v(?:add|addc)_" + _INFIX + r"ukernel_", + "Subtract": r"_v(?:sub|subc|rsubc)_" + _INFIX + r"ukernel_", + "Multiply": r"_v(?:mul|mulc)_" + _INFIX + r"ukernel_", + "Divide": r"_v(?:div|divc|rdivc)_" + _INFIX + r"ukernel_", + "Maximum": r"_v(?:max|maxc)_" + _INFIX + r"ukernel_", + "Minimum": r"_v(?:min|minc)_" + _INFIX + r"ukernel_", + "Clamp": r"_vclamp_" + _INFIX + r"ukernel_", + "Sigmoid": r"_vsigmoid_" + _INFIX + r"ukernel_", + "Tanh": r"_vtanh_" + _INFIX + r"ukernel_", + "Negate": r"_vneg_" + _INFIX + r"ukernel_", + "Abs": r"_vabs_" + _INFIX + r"ukernel_", + "Square": r"_vsqr_" + _INFIX + r"ukernel_", + "Square Root": r"_vsqrt_" + _INFIX + r"ukernel_", + "Reciprocal Square Root": r"_vrsqrt_" + _INFIX + r"ukernel_", + "Convert": r"_vcvt_" + _INFIX + r"ukernel_", + "Copy": r"_(?:copy|memcpy)_" + _INFIX + r"ukernel_", + "Constant Pad": r"_xx_pad_" + _INFIX + r"ukernel_", + "Softmax": r"_(?:raddstoreexpminusmax|rmax)_" + _INFIX + r"ukernel_", + "Max Pooling": r"_maxpool_" + _INFIX + r"ukernel_", +} + + +def op_kernels(op_name, kernels): + m = _OP_NAME_RE.match(op_name) + if not m: + return [] + base, inside, tail = m.group(1).strip(), m.group(2), m.group(3).strip() + key = tail if tail in _KIND_PATTERN else (base if base in _KIND_PATTERN else None) + if key is None: + return [] + dtype_tokens = [ + s.strip().lower() for s in inside.split(",") if s.strip() in _DTYPE_TOKENS + ] + cat_re = re.compile(_KIND_PATTERN[key]) + return [ + sym + for sym in kernels + if cat_re.search(sym) and all(d in sym for d in dtype_tokens) + ] + + +def aggregate(etdump_path: Path): + insp = Inspector(etdump_path=str(etdump_path)) + per_op = defaultdict(lambda: {"count": 0, "raw": []}) + framework = defaultdict(lambda: {"count": 0, "raw": []}) + for block in insp.event_blocks: + for ev in block.events: + m = _SEQ_RE.match(ev.name or "") + base = m.group(1) if m else (ev.name or "") + bucket = framework if base in FRAMEWORK_EVENTS else per_op + bucket[base]["count"] += 1 + bucket[base]["raw"].extend(ev.perf_data.raw if ev.perf_data else []) + return per_op, framework + + +def render(per_op, framework, etdump_path, kernels): + def rows_of(d): + rows = [] + for name, v in d.items(): + raw = v["raw"] + s = sum(raw) + rows.append( + { + "op": name, + "count": v["count"], + "sum_ms": s, + "avg_ms": (s / len(raw)) if raw else 0.0, + "max_ms": max(raw) if raw else 0.0, + "kernels": op_kernels(name, kernels) if kernels else [], + } + ) + rows.sort(key=lambda r: r["sum_ms"], reverse=True) + return rows + + op_rows = rows_of(per_op) + fw_rows = rows_of(framework) + ops_total = sum(r["sum_ms"] for r in op_rows) + fw_total = sum(r["sum_ms"] for r in fw_rows) + + def fmt_table(label, rows, total): + print(f"\n[etdump_summary] {label} total={total:.3f} ms") + print( + f"{'%':>5} {'sum_ms':>10} {'count':>6} {'avg_ms':>10} {'max_ms':>10} op" + ) + for r in rows: + pct = (r["sum_ms"] / total * 100.0) if total else 0.0 + print( + f"{pct:5.1f} {r['sum_ms']:10.3f} {r['count']:6d} " + f"{r['avg_ms']:10.3f} {r['max_ms']:10.3f} {r['op']}" + ) + + print(f"[etdump_summary] {etdump_path}") + fmt_table(f"XNNPACK ops ({len(op_rows)} unique)", op_rows, ops_total) + fmt_table(f"Framework wrappers ({len(fw_rows)})", fw_rows, fw_total) + if kernels: + print(f"\n[etdump_summary] Registered XNNPACK microkernels ({len(kernels)}):") + for sym in kernels: + print(f" {sym}") + + return op_rows, fw_rows, ops_total + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("etdump", type=Path) + parser.add_argument("--run-log", type=Path, default=None) + parser.add_argument("--json", type=Path, default=None) + args = parser.parse_args() + + if not args.etdump.exists(): + print(f"[etdump_summary] missing {args.etdump}", file=sys.stderr) + sys.exit(1) + + kernels = [] + if args.run_log is not None: + if not args.run_log.exists(): + print(f"[etdump_summary] missing run log {args.run_log}", file=sys.stderr) + sys.exit(1) + kernels = parse_run_log(args.run_log) + + per_op, framework = aggregate(args.etdump) + op_rows, fw_rows, ops_total = render(per_op, framework, args.etdump, kernels) + + if args.json is not None: + args.json.parent.mkdir(parents=True, exist_ok=True) + args.json.write_text( + json.dumps( + { + "etdump": str(args.etdump), + "run_log": str(args.run_log) if args.run_log else None, + "ops_total_ms": ops_total, + "registered_kernels": kernels, + "ops": op_rows, + "framework": fw_rows, + }, + indent=2, + ) + ) + print(f"[etdump_summary] wrote {args.json}") + + +if __name__ == "__main__": + main() diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh index d6e86031ac9..916284cb73c 100755 --- a/examples/riscv/run.sh +++ b/examples/riscv/run.sh @@ -24,6 +24,7 @@ model="add" xnnpack=false quantize=false verbose=false +verbose_xnnpack=false usage() { cat < CMake build directory (default: ${build_dir}) --output_dir= Directory for the exported .bpte (default: ${output_dir}) @@ -48,6 +50,7 @@ for arg in "$@"; do --xnnpack) xnnpack=true ;; --quantize) quantize=true ;; --verbose) verbose=true ;; + --verbose-xnnpack) verbose_xnnpack=true ;; --build_only) build_only=true ;; --build_dir=*) build_dir="${arg#*=}" ;; --output_dir=*) output_dir="${arg#*=}" ;; @@ -79,6 +82,9 @@ cmake_extra_args=() if ${xnnpack}; then cmake_extra_args+=(-DEXECUTORCH_BUILD_XNNPACK=ON) fi +if ${verbose_xnnpack}; then + cmake_extra_args+=(-DEXECUTORCH_XNNPACK_LOG_LEVEL=4 -DEXECUTORCH_BUILD_RISCV_ETDUMP=ON) +fi cmake -S "${et_root_dir}" -B "${build_dir}" \ --preset riscv64-linux \ "${cmake_extra_args[@]}" \ @@ -115,13 +121,20 @@ if [[ -n "${QEMU_CPU+x}" ]]; then echo "[run.sh] QEMU_CPU=${QEMU_CPU}" fi -log_file=$(mktemp) -trap 'rm -f "${log_file}"' EXIT - runner_extra_args=() if ${quantize}; then runner_extra_args+=(--bundleio_rtol=0.1 --bundleio_atol=0.25) fi +etdump_path="" +if ${verbose_xnnpack}; then + etdump_path="${output_dir}/${model}_riscv.etdump" + rm -f "${etdump_path}" + runner_extra_args+=(--etdump_path="${etdump_path}") +fi + +# etdump_summary.py reads the XNN_LOG_LEVEL=4 registrations. +log_file="${output_dir}/${model}_riscv.run.log" +rm -f "${log_file}" set +e timeout --signal=KILL "${qemu_timeout}" "${qemu}" "${runner}" \ @@ -133,6 +146,12 @@ set -e echo "[run.sh] qemu exit status: ${qemu_status}" +if [[ -n "${etdump_path}" && -f "${etdump_path}" ]]; then + python "${script_dir}/etdump_summary.py" "${etdump_path}" \ + --run-log "${log_file}" \ + --json "${etdump_path}.json" || true +fi + if grep -q "Test_result: PASS" "${log_file}"; then echo "[run.sh] Bundled I/O check PASSED" exit 0 diff --git a/tools/cmake/preset/riscv64_linux.cmake b/tools/cmake/preset/riscv64_linux.cmake index c094534b594..87894b63088 100644 --- a/tools/cmake/preset/riscv64_linux.cmake +++ b/tools/cmake/preset/riscv64_linux.cmake @@ -10,6 +10,18 @@ set_overridable_option(EXECUTORCH_BUILD_DEVTOOLS ON) set_overridable_option(EXECUTORCH_ENABLE_BUNDLE_IO ON) set_overridable_option(EXECUTORCH_ENABLE_LOGGING ON) +define_overridable_option( + EXECUTORCH_BUILD_RISCV_ETDUMP "Build etdump support for RISC-V" BOOL OFF +) + +if("${EXECUTORCH_BUILD_RISCV_ETDUMP}") + set(EXECUTORCH_BUILD_DEVTOOLS ON) + set(EXECUTORCH_ENABLE_EVENT_TRACER ON) + set(FLATCC_ALLOW_WERROR OFF) +else() + set(EXECUTORCH_ENABLE_EVENT_TRACER OFF) +endif() + if(EXECUTORCH_BUILD_XNNPACK) if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS 14) message(FATAL_ERROR "XNNPACK requires GCC 14+ on riscv64") From 2c8507d44c542756fbc80f5b684c981710627d43 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Wed, 20 May 2026 20:49:21 +0200 Subject: [PATCH 3/6] Align RISC-V workflow display name to others --- .github/workflows/riscv64.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml index d7beae7dc8e..ddb1955ece2 100644 --- a/.github/workflows/riscv64.yml +++ b/.github/workflows/riscv64.yml @@ -1,4 +1,4 @@ -name: RISC-V +name: Test RISC-V Backend on: push: From 4e1355dfb0bd7df5e5d096aa979fd45589196a95 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Thu, 21 May 2026 09:33:08 +0200 Subject: [PATCH 4/6] Always use executorch-ubuntu-24.04-gcc14, newer QEMU is needed for RISC-V testing --- .github/workflows/_test_riscv.yml | 8 ++------ .github/workflows/riscv64.yml | 3 --- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/.github/workflows/_test_riscv.yml b/.github/workflows/_test_riscv.yml index 4be2732016e..163ede72ab2 100644 --- a/.github/workflows/_test_riscv.yml +++ b/.github/workflows/_test_riscv.yml @@ -31,10 +31,6 @@ on: description: 'Configuration(s) for the CPU to emulate with QEMU, expecting a JSON array' required: true type: string - gcc-version: - description: 'The version of GCC to use' - required: false - type: number docker-image: description: 'The docker image to use for this job' required: false @@ -45,7 +41,7 @@ jobs: uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: runner: linux.2xlarge - docker-image: ${{ inputs.docker-image || 'ci-image:executorch-ubuntu-22.04-gcc11' }} + docker-image: ci-image:executorch-ubuntu-24.04-gcc14 submodules: 'recursive' ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} timeout: ${{ inputs.timeout }} @@ -58,7 +54,7 @@ jobs: echo '${{ inputs.qemu-cpu }}' | jq -r '.[]' | while IFS= read -r qemu_cpu; do export QEMU_CPU="${qemu_cpu}" - export GCC_VERSION=${{ inputs.gcc-version }} + export GCC_VERSION=14 bash .ci/scripts/test_riscv_qemu.sh \ --model="${{ inputs.model }}" \ ${{ inputs.xnnpack && '--xnnpack --verbose-xnnpack' || '' }} \ diff --git a/.github/workflows/riscv64.yml b/.github/workflows/riscv64.yml index ddb1955ece2..14b9ad62047 100644 --- a/.github/workflows/riscv64.yml +++ b/.github/workflows/riscv64.yml @@ -64,6 +64,3 @@ jobs: ]' ) }} - # XNNPACK requires GCC 14+ - gcc-version: ${{ matrix.xnnpack && 14 || 11 }} - docker-image: ${{ matrix.xnnpack && 'ci-image:executorch-ubuntu-24.04-gcc14' || 'ci-image:executorch-ubuntu-22.04-gcc11' }} From 239fe1b942a181aa8d0d057d68ab89b2ba2e6eb5 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Thu, 21 May 2026 09:36:19 +0200 Subject: [PATCH 5/6] Rename --verbose to --debug-xnnpack It's more aligned with the intent --- .ci/scripts/test_riscv_qemu.sh | 10 +++++----- examples/riscv/aot_riscv.py | 8 ++++---- examples/riscv/run.sh | 10 +++++----- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.ci/scripts/test_riscv_qemu.sh b/.ci/scripts/test_riscv_qemu.sh index d1998561553..2842542aa3a 100755 --- a/.ci/scripts/test_riscv_qemu.sh +++ b/.ci/scripts/test_riscv_qemu.sh @@ -17,8 +17,8 @@ et_root_dir=$(realpath "${script_dir}/../..") model="add" xnnpack=false quantize=false -verbose=false verbose_xnnpack=false +debug_xnnpack=false usage() { cat < Which model to export and run (default: add) --xnnpack Enable the XNNPACK backend (AOT partitioner + runtime) --quantize Produce an 8-bit quantized model - --verbose Enable XNNPACK partitioner DEBUG logging and dump the lowered graph --verbose-xnnpack Build XNNPACK with XNN_LOG_LEVEL=4 to log microkernel dispatch + --debug-xnnpack Enable XNNPACK partitioner DEBUG logging and dump the lowered graph -h, --help Show this help EOF } @@ -38,7 +38,7 @@ for arg in "$@"; do --model=*) model="${arg#*=}" ;; --xnnpack) xnnpack=true ;; --quantize) quantize=true ;; - --verbose) verbose=true ;; + --debug-xnnpack) debug_xnnpack=true ;; --verbose-xnnpack) verbose_xnnpack=true ;; -h|--help) usage; exit 0 ;; *) echo "Unknown option: $arg" >&2; usage; exit 1 ;; @@ -52,8 +52,8 @@ fi if ${quantize}; then run_extra_args+=(--quantize) fi -if ${verbose}; then - run_extra_args+=(--verbose) +if ${debug_xnnpack}; then + run_extra_args+=(--debug-xnnpack) fi if ${verbose_xnnpack}; then run_extra_args+=(--verbose-xnnpack) diff --git a/examples/riscv/aot_riscv.py b/examples/riscv/aot_riscv.py index 22e8b31df73..529e2b1e767 100644 --- a/examples/riscv/aot_riscv.py +++ b/examples/riscv/aot_riscv.py @@ -148,13 +148,13 @@ def main() -> None: help="Produce an 8-bit quantized model", ) parser.add_argument( - "--verbose", + "--debug-xnnpack", action="store_true", help="Enable XNNPACK partitioner DEBUG logging and dump the lowered graph", ) args = parser.parse_args() - if args.verbose: + if args.debug_xnnpack: logging.basicConfig(level=logging.DEBUG) if args.output is None: @@ -181,7 +181,7 @@ def main() -> None: XnnpackPartitioner, ) - partitioners.append(XnnpackPartitioner(verbose=args.verbose)) + partitioners.append(XnnpackPartitioner(verbose=args.debug_xnnpack)) compile_config = None if args.quantize: @@ -202,7 +202,7 @@ def main() -> None: f"quantize={args.quantize} delegated_nodes={delegated}" ) - if args.verbose: + if args.debug_xnnpack: from executorch.exir.backend.utils import print_delegated_graph print_delegated_graph(edge.exported_program().graph_module) diff --git a/examples/riscv/run.sh b/examples/riscv/run.sh index 916284cb73c..2c207816bfc 100755 --- a/examples/riscv/run.sh +++ b/examples/riscv/run.sh @@ -23,7 +23,7 @@ qemu_timeout="600" model="add" xnnpack=false quantize=false -verbose=false +debug_xnnpack=false verbose_xnnpack=false usage() { @@ -33,8 +33,8 @@ Options: --model= Which model to export and run (default: ${model}) --xnnpack Enable the XNNPACK backend (AOT partitioner + runtime) --quantize Produce an 8-bit quantized model - --verbose Enable XNNPACK partitioner DEBUG logging and dump the lowered graph --verbose-xnnpack Build XNNPACK with XNN_LOG_LEVEL=4 to log microkernel dispatch at runtime + --debug-xnnpack Enable XNNPACK partitioner DEBUG logging and dump the lowered graph --build_only Only export and cross-compile; do not invoke QEMU --build_dir= CMake build directory (default: ${build_dir}) --output_dir= Directory for the exported .bpte (default: ${output_dir}) @@ -49,7 +49,7 @@ for arg in "$@"; do --model=*) model="${arg#*=}" ;; --xnnpack) xnnpack=true ;; --quantize) quantize=true ;; - --verbose) verbose=true ;; + --debug-xnnpack) debug_xnnpack=true ;; --verbose-xnnpack) verbose_xnnpack=true ;; --build_only) build_only=true ;; --build_dir=*) build_dir="${arg#*=}" ;; @@ -72,8 +72,8 @@ fi if ${quantize}; then aot_extra_args+=(--quantize) fi -if ${verbose}; then - aot_extra_args+=(--verbose) +if ${debug_xnnpack}; then + aot_extra_args+=(--debug-xnnpack) fi python "${script_dir}/aot_riscv.py" --model "${model}" "${aot_extra_args[@]}" --output "${bpte_path}" From e17eca91aea1cbf1b6527b1fbfe73c1ee030c2b2 Mon Sep 17 00:00:00 2001 From: Ludovic Henry Date: Thu, 21 May 2026 10:39:37 +0200 Subject: [PATCH 6/6] Fix possible issues with 'echo | jq | while read' failure in pipes --- .github/workflows/_test_riscv.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/_test_riscv.yml b/.github/workflows/_test_riscv.yml index 163ede72ab2..e3b049bd614 100644 --- a/.github/workflows/_test_riscv.yml +++ b/.github/workflows/_test_riscv.yml @@ -52,6 +52,9 @@ jobs: source .ci/scripts/utils.sh install_executorch "--use-pt-pinned-commit" + # Allows failure in `echo | jq | while read` pipeline to bubble up and fail the workflow + set -o pipefail + echo '${{ inputs.qemu-cpu }}' | jq -r '.[]' | while IFS= read -r qemu_cpu; do export QEMU_CPU="${qemu_cpu}" export GCC_VERSION=14