diff --git a/.github/scripts/run_parallel_benchmarks.sh b/.github/scripts/run_parallel_benchmarks.sh
index 06b6eaa8d3..be9b5c5a94 100755
--- a/.github/scripts/run_parallel_benchmarks.sh
+++ b/.github/scripts/run_parallel_benchmarks.sh
@@ -52,16 +52,16 @@ else
   echo "Master job completed successfully"
 fi
 
-# Check if either job failed
+# Warn if either job failed (partial results may still be usable)
 if [ "${pr_exit}" -ne 0 ] || [ "${master_exit}" -ne 0 ]; then
-  echo "ERROR: One or both benchmark jobs failed: pr_exit=${pr_exit}, master_exit=${master_exit}"
-  exit 1
+  echo "WARNING: Benchmark jobs had failures: pr=${pr_exit}, master=${master_exit}"
+  echo "Checking for partial results..."
+else
+  echo "=========================================="
+  echo "Both benchmark jobs completed successfully!"
+  echo "=========================================="
 fi
 
-echo "=========================================="
-echo "Both benchmark jobs completed successfully!"
-echo "=========================================="
-
 # Final verification that output files exist before proceeding
 pr_yaml="pr/bench-${device}-${interface}.yaml"
 master_yaml="master/bench-${device}-${interface}.yaml"
diff --git a/.github/scripts/setup-build-cache.sh b/.github/scripts/setup-build-cache.sh
new file mode 100755
index 0000000000..9a1c7266e4
--- /dev/null
+++ b/.github/scripts/setup-build-cache.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# Sets up a persistent build cache for self-hosted CI runners.
+# Creates a symlink: ./build -> <scratch>/.mfc-ci-cache/<key>/build
+#
+# Each runner gets its own cache keyed by (cluster, device, interface, runner).
+# This avoids cross-runner path issues entirely — CMake's absolute paths are
+# always correct because the same runner always uses the same workspace path.
+#
+# Usage: source .github/scripts/setup-build-cache.sh <cluster> <device> <interface>
+
+_cache_cluster="${1:?Usage: setup-build-cache.sh <cluster> <device> <interface>}"
+_cache_device="${2:?}"
+_cache_interface="${3:-none}"
+_cache_runner="${RUNNER_NAME:?RUNNER_NAME not set}"
+
+_cache_key="${_cache_cluster}-${_cache_device}-${_cache_interface}-${_cache_runner}"
+_cache_base="$HOME/scratch/.mfc-ci-cache/${_cache_key}/build"
+
+mkdir -p "$_cache_base"
+_cache_dir="$(cd "$_cache_base" && pwd -P)"
+
+echo "=== Build Cache Setup ==="
+echo "  Cache key: $_cache_key"
+echo "  Cache dir: $_cache_dir"
+
+# Replace any existing build/ (real dir or stale symlink) with a symlink
+# to our runner-specific cache directory.
+# Use unlink for symlinks to avoid rm -rf following the link and deleting
+# the shared cache contents (which another runner may be using).
+if [ -L "build" ]; then
+    unlink "build"
+elif [ -e "build" ]; then
+    rm -rf "build"
+fi
+
+ln -s "$_cache_dir" "build"
+
+echo "  Symlink: build -> $_cache_dir"
+echo "========================="
diff --git a/.github/scripts/submit_and_monitor_bench.sh b/.github/scripts/submit_and_monitor_bench.sh
index 8e4b124107..80790752d7 100755
--- a/.github/scripts/submit_and_monitor_bench.sh
+++ b/.github/scripts/submit_and_monitor_bench.sh
@@ -37,9 +37,13 @@ fi
 echo "[$dir] Job ID: $job_id, monitoring output file: $output_file"
 
 # Use the monitoring script from PR (where this script lives)
-bash "${SCRIPT_DIR}/monitor_slurm_job.sh" "$job_id" "$output_file"
-
-echo "[$dir] Monitoring complete for job $job_id"
+monitor_exit=0
+bash "${SCRIPT_DIR}/monitor_slurm_job.sh" "$job_id" "$output_file" || monitor_exit=$?
+if [ "$monitor_exit" -ne 0 ]; then
+  echo "[$dir] WARNING: SLURM job exited with code $monitor_exit"
+else
+  echo "[$dir] Monitoring complete for job $job_id"
+fi
 
 # Verify the YAML output file was created
 yaml_file="${job_slug}.yaml"
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index 6279f5f578..4f777a165e 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -46,6 +46,13 @@ jobs:
           else
             # Get PR number from workflow_run
             PR_NUMBER="${{ github.event.workflow_run.pull_requests[0].number }}"
+            if [ -z "$PR_NUMBER" ]; then
+              # Cross-repo PRs don't populate pull_requests[]. Search by head SHA.
+              HEAD_SHA="${{ github.event.workflow_run.head_sha }}"
+              PR_NUMBER=$(gh api "repos/${{ github.repository }}/pulls?state=open&sort=updated&direction=desc&per_page=30" \
+                  --jq ".[] | select(.head.sha == \"$HEAD_SHA\") | .number" | head -1)
+            fi
+
             if [ -n "$PR_NUMBER" ]; then
               echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
 
@@ -53,14 +60,20 @@ jobs:
               PR_AUTHOR=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER --jq '.user.login')
               echo "author=$PR_AUTHOR" >> $GITHUB_OUTPUT
 
-              # Check if PR is approved
-              APPROVED=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/reviews \
-                --jq '[.[] | select(.state == "APPROVED")] | length')
-              if [ "$APPROVED" -gt 0 ]; then
-                echo "approved=true" >> $GITHUB_OUTPUT
-              else
-                echo "approved=false" >> $GITHUB_OUTPUT
-              fi
+              # Check if PR is approved by a maintainer/admin (ignore AI bot approvals)
+              APPROVERS=$(gh api "repos/${{ github.repository }}/pulls/$PR_NUMBER/reviews" \
+                  --jq '[.[] | select(.state == "APPROVED") | .user.login] | unique | .[]')
+              APPROVED="false"
+              for approver in $APPROVERS; do
+                  PERM=$(gh api "repos/${{ github.repository }}/collaborators/$approver/permission" \
+                      --jq '.permission' 2>/dev/null || echo "none")
+                  if [ "$PERM" = "admin" ] || [ "$PERM" = "maintain" ] || [ "$PERM" = "write" ]; then
+                      echo "  Approved by $approver (permission: $PERM)"
+                      APPROVED="true"
+                      break
+                  fi
+              done
+              echo "approved=$APPROVED" >> $GITHUB_OUTPUT
             else
               echo "pr_number=" >> $GITHUB_OUTPUT
               echo "approved=false" >> $GITHUB_OUTPUT
@@ -76,8 +89,7 @@ jobs:
       (
         github.event_name == 'workflow_dispatch' ||
         needs.file-changes.outputs.pr_approved == 'true' ||
-        needs.file-changes.outputs.pr_author == 'sbryngelson' ||
-        needs.file-changes.outputs.pr_author == 'wilfonba'
+        needs.file-changes.outputs.pr_author == 'sbryngelson'
       )
     needs: [file-changes]
     strategy:
@@ -164,6 +176,7 @@ jobs:
         run: bash pr/.github/scripts/run_parallel_benchmarks.sh ${{ matrix.device }} ${{ matrix.interface }} ${{ matrix.cluster }}
 
       - name: Generate & Post Comment
+        if: always()
         run: |
           (cd pr && . ./mfc.sh load -c ${{ matrix.flag }} -m g)
           (cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}-${{ matrix.interface }}.yaml ../pr/bench-${{ matrix.device }}-${{ matrix.interface }}.yaml)
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index 6cd6db11b8..9589c79e49 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -35,6 +35,12 @@ jobs:
       - name: Checkouts
         uses: actions/checkout@v4
 
+      - name: Restore Build Cache
+        uses: actions/cache@v4
+        with:
+          path: build
+          key: mfc-coverage-${{ hashFiles('CMakeLists.txt', 'toolchain/dependencies/**', 'toolchain/cmake/**', 'src/**/*.fpp', 'src/**/*.f90') }}
+
       - name: Setup Ubuntu
         run: |
             sudo apt update -y
diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh
index 18cddc96ca..ca09c2a116 100644
--- a/.github/workflows/frontier/build.sh
+++ b/.github/workflows/frontier/build.sh
@@ -18,6 +18,11 @@ fi
 
 . ./mfc.sh load -c f -m g
 
+# Only set up build cache for test suite, not benchmarks
+if [ "$run_bench" != "bench" ]; then
+    source .github/scripts/setup-build-cache.sh frontier "$job_device" "$job_interface"
+fi
+
 max_attempts=3
 attempt=1
 while [ $attempt -le $max_attempts ]; do
@@ -45,8 +50,8 @@ while [ $attempt -le $max_attempts ]; do
     fi
 
     if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
+        echo "Build failed on attempt $attempt. Clearing cache and retrying in 30s..."
+        rm -rf build/staging build/install build/lock.yaml
         sleep 30
     fi
     attempt=$((attempt + 1))
diff --git a/.github/workflows/frontier_amd/build.sh b/.github/workflows/frontier_amd/build.sh
index 56c47d8ff4..6036f73fc2 100644
--- a/.github/workflows/frontier_amd/build.sh
+++ b/.github/workflows/frontier_amd/build.sh
@@ -18,6 +18,11 @@ fi
 
 . ./mfc.sh load -c famd -m g
 
+# Only set up build cache for test suite, not benchmarks
+if [ "$run_bench" != "bench" ]; then
+    source .github/scripts/setup-build-cache.sh frontier_amd "$job_device" "$job_interface"
+fi
+
 max_attempts=3
 attempt=1
 while [ $attempt -le $max_attempts ]; do
@@ -45,8 +50,8 @@ while [ $attempt -le $max_attempts ]; do
     fi
 
     if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
+        echo "Build failed on attempt $attempt. Clearing cache and retrying in 30s..."
+        rm -rf build/staging build/install build/lock.yaml
         sleep 30
     fi
     attempt=$((attempt + 1))
diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
index 3920d96ed7..74c31c9fba 100644
--- a/.github/workflows/phoenix/test.sh
+++ b/.github/workflows/phoenix/test.sh
@@ -10,18 +10,39 @@ if [ "$job_device" = "gpu" ]; then
     fi
 fi
 
+# Set up persistent build cache
+source .github/scripts/setup-build-cache.sh phoenix "$job_device" "$job_interface"
+
 max_attempts=3
 attempt=1
 while [ $attempt -le $max_attempts ]; do
     echo "Build attempt $attempt of $max_attempts..."
     if ./mfc.sh test -v --dry-run -j 8 $build_opts; then
         echo "Build succeeded on attempt $attempt."
+
+        # Smoke-test the cached binaries to catch architecture mismatches
+        # (SIGILL from binaries compiled on a different compute node).
+        syscheck_bin=$(find build/install -name syscheck -type f 2>/dev/null | head -1)
+        if [ -n "$syscheck_bin" ] && ! "$syscheck_bin" > /dev/null 2>&1; then
+            echo "WARNING: syscheck binary crashed — cached install is stale."
+            if [ $attempt -lt $max_attempts ]; then
+                echo "Clearing cache and rebuilding..."
+                rm -rf build/staging build/install build/lock.yaml
+                sleep 5
+                attempt=$((attempt + 1))
+                continue
+            else
+                echo "ERROR: syscheck still failing after $max_attempts attempts."
+                exit 1
+            fi
+        fi
+
         break
     fi
 
     if [ $attempt -lt $max_attempts ]; then
-        echo "Build failed on attempt $attempt. Cleaning and retrying in 30s..."
-        ./mfc.sh clean
+        echo "Build failed on attempt $attempt. Clearing cache and retrying in 30s..."
+        rm -rf build/staging build/install build/lock.yaml
         sleep 30
     else
         echo "Build failed after $max_attempts attempts."
@@ -40,4 +61,3 @@ if [ "$job_device" = "gpu" ]; then
 fi
 
 ./mfc.sh test -v --max-attempts 3 -a -j $n_test_threads $device_opts -- -c phoenix
-
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 0be51076ec..9deb1920a2 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -94,6 +94,12 @@ jobs:
       - name: Clone
         uses: actions/checkout@v4
 
+      - name: Restore Build Cache
+        uses: actions/cache@v4
+        with:
+          path: build
+          key: mfc-build-${{ matrix.os }}-${{ matrix.mpi }}-${{ matrix.debug }}-${{ matrix.precision }}-${{ matrix.intel }}-${{ hashFiles('CMakeLists.txt', 'toolchain/dependencies/**', 'toolchain/cmake/**', 'src/**/*.fpp', 'src/**/*.f90') }}
+
       - name: Setup MacOS
         if:   matrix.os == 'macos'
         run:  |
@@ -202,6 +208,8 @@ jobs:
     steps:
       - name: Clone
         uses: actions/checkout@v4
+        with:
+          clean: false
 
       - name: Build
         if:   matrix.cluster != 'phoenix'
diff --git a/toolchain/mfc/bench.py b/toolchain/mfc/bench.py
index 78b9b0fc65..80da40a7d4 100644
--- a/toolchain/mfc/bench.py
+++ b/toolchain/mfc/bench.py
@@ -1,4 +1,4 @@
-import os, sys, uuid, subprocess, dataclasses, typing, math, traceback
+import os, sys, uuid, subprocess, dataclasses, typing, math, traceback, time
 
 import rich.table
 
@@ -16,7 +16,7 @@ class BenchCase:
     path: str
     args: typing.List[str]
 
-# pylint: disable=too-many-locals, too-many-branches, too-many-statements
+# pylint: disable=too-many-locals, too-many-branches, too-many-statements, too-many-nested-blocks
 def bench(targets = None):
     if targets is None:
         targets = ARG("targets")
@@ -53,6 +53,8 @@ def bench(targets = None):
 
         failed_cases = []
 
+        max_attempts = 2
+
         for i, case in enumerate(CASES):
             summary_filepath = os.path.join(bench_dirpath, f"{case.slug}.yaml")
             log_filepath     = os.path.join(bench_dirpath, f"{case.slug}.out")
@@ -64,71 +66,94 @@ def bench(targets = None):
             cons.print(f"> Summary: [bold]{os.path.relpath(summary_filepath)}[/bold]")
 
             try:
-                with open(log_filepath, "w") as log_file:
-                    result = system(
-                        ["./mfc.sh", "run", case.path, "--case-optimization"] +
-                        ["--targets"] + [t.name for t in targets] +
-                        ["--output-summary", summary_filepath] +
-                        case.args +
-                        ["--", "--gbpp", str(ARG('mem'))],
-                        stdout=log_file,
-                        stderr=subprocess.STDOUT)
-
-                # Check return code (handle CompletedProcess or int defensively)
-                rc = result.returncode if hasattr(result, "returncode") else result
-                if rc != 0:
-                    cons.print(f"[bold red]ERROR[/bold red]: Case {case.slug} failed with exit code {rc}")
-                    cons.print(f"[bold red]      Check log at: {log_filepath}[/bold red]")
-                    failed_cases.append(case.slug)
-                    continue
-
-                # Validate summary file exists
-                if not os.path.exists(summary_filepath):
-                    cons.print(f"[bold red]ERROR[/bold red]: Summary file not created for {case.slug}")
-                    cons.print(f"[bold red]      Expected: {summary_filepath}[/bold red]")
-                    failed_cases.append(case.slug)
-                    continue
-
-                # Load summary
-                summary = file_load_yaml(summary_filepath)
-
-                # Validate all targets have required data
-                validation_failed = False
-                for target in targets:
-                    if target.name not in summary:
-                        cons.print(f"[bold red]ERROR[/bold red]: Target {target.name} missing from summary for {case.slug}")
-                        validation_failed = True
-                        break
-
-                    if "exec" not in summary[target.name]:
-                        cons.print(f"[bold red]ERROR[/bold red]: 'exec' time missing for {target.name} in {case.slug}")
-                        validation_failed = True
+                for attempt in range(1, max_attempts + 1):
+                    try:
+                        with open(log_filepath, "w") as log_file:
+                            result = system(
+                                ["./mfc.sh", "run", case.path, "--case-optimization"] +
+                                ["--targets"] + [t.name for t in targets] +
+                                ["--output-summary", summary_filepath] +
+                                case.args +
+                                ["--", "--gbpp", str(ARG('mem'))],
+                                stdout=log_file,
+                                stderr=subprocess.STDOUT)
+
+                        # Check return code (handle CompletedProcess or int defensively)
+                        rc = result.returncode if hasattr(result, "returncode") else result
+                        if rc != 0:
+                            if attempt < max_attempts:
+                                cons.print(f"[bold yellow]WARNING[/bold yellow]: Case {case.slug} failed with exit code {rc} (attempt {attempt}/{max_attempts})")
+                                cons.print(f"Retrying in 5s...")
+                                time.sleep(5)
+                                continue
+                            cons.print(f"[bold red]ERROR[/bold red]: Case {case.slug} failed with exit code {rc}")
+                            cons.print(f"[bold red]      Check log at: {log_filepath}[/bold red]")
+                            failed_cases.append(case.slug)
+                            break
+
+                        # Validate summary file exists
+                        if not os.path.exists(summary_filepath):
+                            if attempt < max_attempts:
+                                cons.print(f"[bold yellow]WARNING[/bold yellow]: Summary file not created for {case.slug} (attempt {attempt}/{max_attempts})")
+                                cons.print(f"Retrying in 5s...")
+                                time.sleep(5)
+                                continue
+                            cons.print(f"[bold red]ERROR[/bold red]: Summary file not created for {case.slug}")
+                            cons.print(f"[bold red]      Expected: {summary_filepath}[/bold red]")
+                            failed_cases.append(case.slug)
+                            break
+
+                        # Load summary
+                        summary = file_load_yaml(summary_filepath)
+
+                        # Validate all targets have required data
+                        validation_failed = False
+                        for target in targets:
+                            if target.name not in summary:
+                                cons.print(f"[bold red]ERROR[/bold red]: Target {target.name} missing from summary for {case.slug}")
+                                validation_failed = True
+                                break
+
+                            if "exec" not in summary[target.name]:
+                                cons.print(f"[bold red]ERROR[/bold red]: 'exec' time missing for {target.name} in {case.slug}")
+                                validation_failed = True
+                                break
+
+                            if target.name == "simulation" and "grind" not in summary[target.name]:
+                                cons.print(f"[bold red]ERROR[/bold red]: 'grind' time missing for simulation in {case.slug}")
+                                validation_failed = True
+                                break
+
+                        if validation_failed:
+                            failed_cases.append(case.slug)
+                            break
+
+                        # Add to results
+                        results["cases"][case.slug] = {
+                            "description":    dataclasses.asdict(case),
+                            "output_summary": summary,
+                        }
+                        cons.print(f"[bold green]✓[/bold green] Case {case.slug} completed successfully")
                         break
 
-                    if target.name == "simulation" and "grind" not in summary[target.name]:
-                        cons.print(f"[bold red]ERROR[/bold red]: 'grind' time missing for simulation in {case.slug}")
-                        validation_failed = True
-                        break
+                    except Exception as e:
+                        if attempt < max_attempts:
+                            cons.print(f"[bold yellow]WARNING[/bold yellow]: Unexpected error running {case.slug} (attempt {attempt}/{max_attempts}): {e}")
+                            cons.print(f"Retrying in 5s...")
+                            time.sleep(5)
+                            continue
+                        cons.print(f"[bold red]ERROR[/bold red]: Unexpected error running {case.slug}: {e}")
+                        cons.print(f"[dim]{traceback.format_exc()}[/dim]")
+                        failed_cases.append(case.slug)
 
-                if validation_failed:
-                    failed_cases.append(case.slug)
-                    continue
-
-                # Add to results
-                results["cases"][case.slug] = {
-                    "description":    dataclasses.asdict(case),
-                    "output_summary": summary,
-                }
-                cons.print(f"[bold green]✓[/bold green] Case {case.slug} completed successfully")
-
-            except Exception as e:
-                cons.print(f"[bold red]ERROR[/bold red]: Unexpected error running {case.slug}: {e}")
-                cons.print(f"[dim]{traceback.format_exc()}[/dim]")
-                failed_cases.append(case.slug)
             finally:
                 cons.unindent()
 
-        # Report results
+        # Always write results (even partial) so diff() can compare the intersection
+        file_dump_yaml(ARG("output"), results)
+        cons.print(f"Wrote results to [bold magenta]{os.path.relpath(ARG('output'))}[/bold magenta].")
+
+        # Report failures after writing results
         if failed_cases:
             cons.print()
             cons.print(f"[bold red]Failed cases ({len(failed_cases)}):[/bold red]")
@@ -137,11 +162,6 @@ def bench(targets = None):
             cons.print()
             raise MFCException(f"Benchmarking failed: {len(failed_cases)}/{len(CASES)} cases failed")
 
-        # Write output
-        file_dump_yaml(ARG("output"), results)
-
-        cons.print(f"Wrote results to [bold magenta]{os.path.relpath(ARG('output'))}[/bold magenta].")
-
     finally:
         cons.unindent()