Skip to content

Commit f3caca5

Browse files
authored
Merge branch 'master' into MTHINC
2 parents ff02cc6 + 2d15ba9 commit f3caca5

3 files changed

Lines changed: 127 additions & 49 deletions

File tree

.github/workflows/common/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,4 @@ if [ "$job_cluster" = "phoenix" ]; then
4040
fi
4141

4242
RETRY_VALIDATE_CMD="$validate_cmd" \
43-
retry_build ./mfc.sh test -v --dry-run -j 8 $build_opts || exit 1
43+
retry_build ./mfc.sh test -v --dry-run -a -j 8 $build_opts || exit 1

.github/workflows/test.yml

Lines changed: 100 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -213,25 +213,25 @@ jobs:
213213
fail-fast: false
214214
continue-on-error: true
215215
runs-on: ${{ matrix.nvhpc && 'ubuntu-22.04' || format('{0}-latest', matrix.os) }}
216-
container:
217-
image: ${{ matrix.nvhpc && format('nvcr.io/nvidia/nvhpc:{0}-devel-cuda_multi-ubuntu22.04', matrix.nvhpc) || '' }}
218-
options: ${{ matrix.nvhpc && '--security-opt seccomp=unconfined' || '' }}
219216
env:
220-
CC: ${{ matrix.nvhpc && 'nvc' || '' }}
221-
CXX: ${{ matrix.nvhpc && 'nvc++' || '' }}
222-
FC: ${{ matrix.nvhpc && 'nvfortran' || '' }}
223-
OMPI_ALLOW_RUN_AS_ROOT: ${{ matrix.nvhpc && '1' || '' }}
224-
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: ${{ matrix.nvhpc && '1' || '' }}
225-
PMIX_MCA_gds: ${{ matrix.nvhpc && 'hash' || '' }}
226-
OMPI_MCA_hwloc_base_binding_policy: ${{ matrix.nvhpc && 'none' || '' }}
227-
FFLAGS: ${{ matrix.nvhpc && '-tp=px -Kieee -noswitcherror' || '' }}
228-
CFLAGS: ${{ matrix.nvhpc && '-tp=px' || '' }}
229-
CXXFLAGS: ${{ matrix.nvhpc && '-tp=px' || '' }}
217+
# Image tag for NVHPC jobs; empty for non-NVHPC jobs.
218+
NVHPC_IMAGE: ${{ matrix.nvhpc && format('nvcr.io/nvidia/nvhpc:{0}-devel-cuda_multi-ubuntu22.04', matrix.nvhpc) || '' }}
230219

231220
steps:
232-
- name: Git safe directory
221+
# ── NVHPC: free disk before pulling the ~25-30 GB cuda_multi image ──
222+
- name: Free disk space
233223
if: matrix.nvhpc
234-
run: git config --global --add safe.directory /__w/MFC/MFC
224+
run: |
225+
echo "=== Disk before cleanup ==="
226+
df -h /
227+
sudo rm -rf /usr/share/dotnet /usr/local/lib/android \
228+
/opt/ghc /usr/local/share/boost /opt/hostedtoolcache \
229+
/usr/local/graalvm /usr/local/.ghcup \
230+
/usr/local/share/chromium /usr/local/lib/node_modules
231+
sudo docker image prune -af
232+
sudo apt-get clean
233+
echo "=== Disk after cleanup ==="
234+
df -h /
235235
236236
- name: Clone
237237
uses: actions/checkout@v4
@@ -274,6 +274,67 @@ jobs:
274274
echo "Coverage cache: none available — full test suite will run"
275275
fi
276276
277+
# ── NVHPC: pull image and start a long-lived container ──────────────
278+
# Replaces the container: directive so we can free disk space first.
279+
# Uses "docker run -d ... sleep infinity" + "docker exec" to preserve
280+
# installed packages and env vars across steps.
281+
- name: Pull NVHPC container
282+
if: matrix.nvhpc
283+
run: docker pull "$NVHPC_IMAGE"
284+
285+
- name: Start NVHPC container
286+
if: matrix.nvhpc
287+
run: |
288+
docker run -d --name nvhpc \
289+
--security-opt seccomp=unconfined \
290+
-v "${{ github.workspace }}:/workspace" \
291+
-w /workspace \
292+
-e CC=nvc \
293+
-e CXX=nvc++ \
294+
-e FC=nvfortran \
295+
-e OMPI_ALLOW_RUN_AS_ROOT=1 \
296+
-e OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \
297+
-e PMIX_MCA_gds=hash \
298+
-e OMPI_MCA_hwloc_base_binding_policy=none \
299+
-e "FFLAGS=-tp=px -Kieee -noswitcherror" \
300+
-e CFLAGS=-tp=px \
301+
-e CXXFLAGS=-tp=px \
302+
"$NVHPC_IMAGE" sleep infinity
303+
304+
- name: Setup NVHPC
305+
if: matrix.nvhpc
306+
run: |
307+
docker exec nvhpc bash -c '
308+
set -e
309+
apt-get update -y
310+
apt-get install -y cmake python3 python3-venv python3-pip \
311+
libfftw3-dev libhdf5-dev hdf5-tools git
312+
313+
# The repo is bind-mounted from the host so git sees a different
314+
# owner. Mark it safe to suppress "dubious ownership" errors that
315+
# otherwise spam 80 000+ lines into the CI log.
316+
git config --global --add safe.directory /workspace
317+
318+
# Set up NVHPC HPC-X MPI runtime paths
319+
HPCX_DIR=$(dirname "$(find /opt/nvidia/hpc_sdk -path "*/hpcx/hpcx-*/ompi/bin/mpirun" | head -1)")/../..
320+
MPI_LIB=$(mpifort --showme:link | grep -oP "(?<=-L)\S+" | head -1)
321+
322+
# Persist env vars for subsequent docker exec calls
323+
cat > /etc/nvhpc-env.sh <<EOF
324+
export LD_LIBRARY_PATH=${MPI_LIB}:${HPCX_DIR}/ucx/lib:${HPCX_DIR}/ucc/lib:\$LD_LIBRARY_PATH
325+
export OMPI_MCA_rmaps_base_oversubscribe=1
326+
EOF
327+
328+
# Debug: confirm compiler flags are set
329+
echo "=== NVHPC Environment ==="
330+
echo "FFLAGS=$FFLAGS"
331+
echo "CFLAGS=$CFLAGS"
332+
echo "CXXFLAGS=$CXXFLAGS"
333+
nvfortran --version
334+
cat /proc/cpuinfo | grep "model name" | head -1
335+
'
336+
337+
# ── Standard (non-NVHPC) setup ─────────────────────────────────────
277338
- name: Setup MacOS
278339
if: matrix.os == 'macos' && !matrix.nvhpc
279340
run: |
@@ -313,30 +374,7 @@ jobs:
313374
echo "MPICC=mpiicx" >> $GITHUB_ENV
314375
echo "MPICXX=mpiicpx" >> $GITHUB_ENV
315376
316-
# --- NVHPC container setup ---
317-
- name: Setup NVHPC
318-
if: matrix.nvhpc
319-
run: |
320-
apt-get update -y
321-
apt-get install -y cmake python3 python3-venv python3-pip \
322-
libfftw3-dev libhdf5-dev hdf5-tools git
323-
# Set up NVHPC HPC-X MPI runtime paths
324-
HPCX_DIR=$(dirname "$(find /opt/nvidia/hpc_sdk -path "*/hpcx/hpcx-*/ompi/bin/mpirun" | head -1)")/../..
325-
MPI_LIB=$(mpifort --showme:link | grep -oP '(?<=-L)\S+' | head -1)
326-
echo "LD_LIBRARY_PATH=${MPI_LIB}:${HPCX_DIR}/ucx/lib:${HPCX_DIR}/ucc/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
327-
# Container MPI fixes: PMIx shared-memory, hwloc binding
328-
echo "PMIX_MCA_gds=hash" >> $GITHUB_ENV
329-
echo "OMPI_MCA_hwloc_base_binding_policy=none" >> $GITHUB_ENV
330-
echo "OMPI_MCA_rmaps_base_oversubscribe=1" >> $GITHUB_ENV
331-
# Debug: confirm compiler flags are set
332-
echo "=== NVHPC Environment ==="
333-
echo "FFLAGS=$FFLAGS"
334-
echo "CFLAGS=$CFLAGS"
335-
echo "CXXFLAGS=$CXXFLAGS"
336-
nvfortran --version
337-
cat /proc/cpuinfo | grep "model name" | head -1
338-
339-
# --- Standard build + test ---
377+
# ── Standard build + test ───────────────────────────────────────────
340378
- name: Build
341379
if: '!matrix.nvhpc'
342380
run: |
@@ -354,22 +392,37 @@ jobs:
354392
TEST_PCT: ${{ matrix.debug == 'reldebug' && '-% 20' || '' }}
355393
ONLY_CHANGES: ${{ github.event_name == 'pull_request' && '--only-changes' || '' }}
356394

357-
# --- NVHPC build + test ---
395+
# ── NVHPC build + test (via docker exec into long-lived container) ──
358396
- name: Build (NVHPC)
359397
if: matrix.nvhpc && matrix.target == 'cpu'
360-
run: /bin/bash mfc.sh test -v --dry-run -j $(nproc) --test-all
398+
run: |
399+
docker exec nvhpc bash -c '
400+
source /etc/nvhpc-env.sh
401+
/bin/bash mfc.sh test -v --dry-run -j $(nproc) --test-all
402+
'
361403
362404
- name: Build (NVHPC GPU)
363405
if: matrix.nvhpc && matrix.target == 'gpu'
364-
run: |
365-
/bin/bash mfc.sh test -v --dry-run -j 2 --test-all --gpu acc
366-
/bin/bash mfc.sh test -v --dry-run -j 2 --test-all --gpu mp
406+
run: |
407+
docker exec nvhpc bash -c '
408+
source /etc/nvhpc-env.sh
409+
/bin/bash mfc.sh test -v --dry-run -j 2 --test-all --gpu acc
410+
/bin/bash mfc.sh test -v --dry-run -j 2 --test-all --gpu mp
411+
'
367412
368413
- name: Test (NVHPC)
369414
if: matrix.nvhpc && matrix.target == 'cpu'
370-
run: |
371-
ulimit -s unlimited || ulimit -s 65536 || true
372-
/bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) --test-all
415+
run: |
416+
docker exec nvhpc bash -c '
417+
source /etc/nvhpc-env.sh
418+
ulimit -s unlimited || ulimit -s 65536 || true
419+
/bin/bash mfc.sh test -v --max-attempts 3 -j $(nproc) --test-all
420+
'
421+
422+
# ── Cleanup ─────────────────────────────────────────────────────────
423+
- name: Stop NVHPC container
424+
if: always() && matrix.nvhpc
425+
run: docker rm -f nvhpc || true
373426

374427
self:
375428
name: "${{ matrix.cluster_name }} (${{ matrix.device }}${{ matrix.interface != 'none' && format('-{0}', matrix.interface) || '' }}${{ matrix.shard != '' && format(' [{0}]', matrix.shard) || '' }})"

CMakeLists.txt

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
cmake_minimum_required(VERSION 3.20)
1111

1212

13-
# We include C as a language because - for some reason -
13+
# We include C as a language because - for some reason
1414
# FIND_LIBRARY_USE_LIB64_PATHS is otherwise ignored.
1515

1616
project(MFC LANGUAGES C CXX Fortran)
@@ -491,7 +491,32 @@ function(MFC_SETUP_TARGET)
491491
# Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler.
492492
# A little hacky, but it *is* an edge-case for *one* compiler.
493493
if (NVHPC_USE_TWO_PASS_IPO AND NOT(MFC_OpenMP AND ARGS_OpenMP))
494+
# nvfortran -Mextract does not produce .o files, only inline library
495+
# data. An OBJECT library with -Mextract causes CMake to rebuild
496+
# everything on every build because the expected .o outputs never
497+
# exist. We use a wrapper script as RULE_LAUNCH_COMPILE that runs
498+
# the compiler and then touches the expected .o output file.
499+
set(_ipo_wrapper "${CMAKE_BINARY_DIR}/${ARGS_TARGET}_extract_wrapper.sh")
500+
file(WRITE "${_ipo_wrapper}" [=[#!/bin/sh
501+
# Find the -o argument (the object file CMake expects)
502+
out=
503+
prev=
504+
for arg do
505+
if [ "$prev" = "-o" ]; then out="$arg"; break; fi
506+
prev="$arg"
507+
done
508+
# Run the compiler; propagate its exit status on failure
509+
"$@"
510+
status=$?
511+
[ "$status" -eq 0 ] || exit "$status"
512+
# Touch the .o so CMake's dependency tracking sees it
513+
[ -n "$out" ] && touch "$out"
514+
exit 0
515+
]=])
516+
file(CHMOD "${_ipo_wrapper}" PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE)
494517
add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES})
518+
set_target_properties(${ARGS_TARGET}_lib PROPERTIES
519+
RULE_LAUNCH_COMPILE "${_ipo_wrapper}")
495520
target_compile_options(${ARGS_TARGET}_lib PRIVATE
496521
$<$<COMPILE_LANGUAGE:Fortran>:-Mextract=lib:${ARGS_TARGET}_lib>
497522
$<$<COMPILE_LANGUAGE:Fortran>:-Minline>

0 commit comments

Comments
 (0)