From 2f1739d60dfd962f752531ecdabccc831b202272 Mon Sep 17 00:00:00 2001 From: Tyagi Date: Sun, 8 Mar 2026 00:53:30 +1100 Subject: [PATCH 1/2] perf(mpi): cap thread pools for MPI and document policy Apply MPI-safe default thread caps in the uw launcher and add a runtime oversubscription warning/policy in underworld3 import logic; document behavior and override controls in parallel-computing docs. Underworld development team with AI support from Claude Code --- docs/advanced/parallel-computing.md | 52 +++++++++++++++++++++++- src/underworld3/__init__.py | 61 +++++++++++++++++++++++++++++ uw | 25 ++++++++++++ 3 files changed, 137 insertions(+), 1 deletion(-) diff --git a/docs/advanced/parallel-computing.md b/docs/advanced/parallel-computing.md index 021ce347..81aa1e06 100644 --- a/docs/advanced/parallel-computing.md +++ b/docs/advanced/parallel-computing.md @@ -18,6 +18,56 @@ Underworld3 uses PETSc for parallel operations, which means **you rarely need to The main use of `uw.mpi.rank` is for conditional output/visualization. +## MPI + Thread Pools (Oversubscription) + +When running with MPI, each rank can also spawn BLAS/OpenMP worker threads. +If this is not controlled, total runnable threads can explode and performance +can degrade severely. + +Example: `mpirun -np 8` with OpenBLAS default `10` threads can create up to +`80` compute threads, often slower than expected. + +### Default Underworld3 Policy + +Underworld3 now applies MPI-safe defaults (thread pool size `1`) unless users +explicitly set their own values: + +- `OMP_NUM_THREADS` +- `OPENBLAS_NUM_THREADS` +- `MKL_NUM_THREADS` +- `VECLIB_MAXIMUM_THREADS` +- `NUMEXPR_NUM_THREADS` + +This happens in two places: + +1. `./uw` launcher: sets defaults before Python starts. +2. `underworld3` import path: applies the same defaults for MPI runs if unset. + +### Runtime Warning + +If running with MPI and any of the thread variables above are explicitly set +to values greater than `1`, Underworld3 prints a rank-0 warning about possible +oversubscription. + +### User Controls + +- Disable automatic thread caps: + +```bash +export UW_DISABLE_THREAD_CAPS=1 +``` + +- Suppress warning (keep your explicit thread settings): + +```bash +export UW_SUPPRESS_THREAD_WARNING=1 +``` + +### Recommended Practice + +For most MPI benchmark and production jobs, keep `1` thread per rank unless +you are intentionally tuning hybrid MPI+threads. + ## Parallel-Safe Output ### The Problem with Rank Conditionals @@ -468,4 +518,4 @@ These operations require **ALL ranks** to participate: 4. **Collective operations must run on ALL ranks** - never inside rank conditionals 5. **Test with `mpirun -np N`** to catch issues early -The parallel safety system makes parallel programming in Underworld3 safer and more intuitive - collective operations are evaluated on all ranks automatically, preventing common deadlock scenarios! \ No newline at end of file +The parallel safety system makes parallel programming in Underworld3 safer and more intuitive - collective operations are evaluated on all ranks automatically, preventing common deadlock scenarios! diff --git a/src/underworld3/__init__.py b/src/underworld3/__init__.py index 463c63ba..25b76b20 100644 --- a/src/underworld3/__init__.py +++ b/src/underworld3/__init__.py @@ -71,6 +71,8 @@ from mpi4py import MPI # for initialising MPI import petsc4py as _petsc4py import sys +import os as _os +import warnings as _warnings _petsc4py.init(sys.argv) @@ -88,6 +90,65 @@ # Handle Sphinx autodoc mocking - PETSc mock objects don't support these operations pass + +def _parse_thread_env(name: str): + """Return integer thread count from environment variable, or None.""" + raw = _os.environ.get(name) + if raw is None or str(raw).strip() == "": + return None + try: + return int(float(raw)) + except (TypeError, ValueError): + return None + + +def _apply_mpi_thread_policy(): + """ + Default thread policy for MPI runs. + + Caps common thread pools to 1 unless explicitly set by the user. This + prevents MPI+BLAS oversubscription (large performance regressions). + """ + if MPI.COMM_WORLD.Get_size() <= 1: + return + + if _os.environ.get("UW_DISABLE_THREAD_CAPS", "0").lower() in ("1", "true", "yes", "on"): + return + + thread_vars = ( + "OMP_NUM_THREADS", + "OPENBLAS_NUM_THREADS", + "MKL_NUM_THREADS", + "VECLIB_MAXIMUM_THREADS", + "NUMEXPR_NUM_THREADS", + ) + + for var in thread_vars: + if _os.environ.get(var, "").strip() == "": + _os.environ[var] = "1" + + if MPI.COMM_WORLD.Get_rank() == 0 and _os.environ.get("UW_SUPPRESS_THREAD_WARNING", "0").lower() not in ( + "1", + "true", + "yes", + "on", + ): + oversub = {var: _parse_thread_env(var) for var in thread_vars} + oversub = {k: v for k, v in oversub.items() if v is not None and v > 1} + if oversub: + items = ", ".join(f"{k}={v}" for k, v in oversub.items()) + _warnings.warn( + "MPI run with thread pools > 1 detected " + f"({items}). This may cause severe oversubscription. " + "Set thread counts to 1 per rank, or set UW_SUPPRESS_THREAD_WARNING=1 " + "if this is intentional.", + RuntimeWarning, + stacklevel=2, + ) + + +_apply_mpi_thread_policy() + # Version is derived from git tags via setuptools_scm # Priority: 1) _version.py (generated by setuptools_scm at build time) # 2) importlib.metadata (for installed packages) diff --git a/uw b/uw index a303d39f..21877f09 100755 --- a/uw +++ b/uw @@ -18,6 +18,31 @@ else GREEN='' YELLOW='' BOLD='' NC='' fi +# MPI-safe thread defaults: +# Cap BLAS/OpenMP-style thread pools to 1 unless users explicitly override. +# This avoids severe oversubscription when running multiple MPI ranks. +apply_thread_caps() { + if [ "${UW_DISABLE_THREAD_CAPS:-0}" = "1" ]; then + return + fi + + local thread_vars=( + OMP_NUM_THREADS + OPENBLAS_NUM_THREADS + MKL_NUM_THREADS + VECLIB_MAXIMUM_THREADS + NUMEXPR_NUM_THREADS + ) + + for var in "${thread_vars[@]}"; do + if [ -z "${!var+x}" ] || [ -z "${!var}" ]; then + export "${var}=1" + fi + done +} + +apply_thread_caps + # Find pixi - check PATH first, then ~/.pixi/bin/ (where installer puts it) find_pixi() { if command -v pixi &> /dev/null; then From 9b7010816c979e982cb9402c709a6f4a79f7689d Mon Sep 17 00:00:00 2001 From: Tyagi Date: Tue, 10 Mar 2026 14:14:45 +1100 Subject: [PATCH 2/2] revert: restore uw wrapper from development --- uw | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/uw b/uw index 21877f09..a303d39f 100755 --- a/uw +++ b/uw @@ -18,31 +18,6 @@ else GREEN='' YELLOW='' BOLD='' NC='' fi -# MPI-safe thread defaults: -# Cap BLAS/OpenMP-style thread pools to 1 unless users explicitly override. -# This avoids severe oversubscription when running multiple MPI ranks. -apply_thread_caps() { - if [ "${UW_DISABLE_THREAD_CAPS:-0}" = "1" ]; then - return - fi - - local thread_vars=( - OMP_NUM_THREADS - OPENBLAS_NUM_THREADS - MKL_NUM_THREADS - VECLIB_MAXIMUM_THREADS - NUMEXPR_NUM_THREADS - ) - - for var in "${thread_vars[@]}"; do - if [ -z "${!var+x}" ] || [ -z "${!var}" ]; then - export "${var}=1" - fi - done -} - -apply_thread_caps - # Find pixi - check PATH first, then ~/.pixi/bin/ (where installer puts it) find_pixi() { if command -v pixi &> /dev/null; then