From 1bfad41c75c03628061bc287c1b372b090094b25 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Wed, 11 Mar 2026 19:27:16 -0700 Subject: [PATCH 1/6] caffeine.c: Fix some harmless (but correct) warnings from GCC 13 Add explicit casts to avoid some harmless integer sign warnings. --- src/caffeine/caffeine.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/caffeine/caffeine.c b/src/caffeine/caffeine.c index 5e81fcee..483f5eaf 100644 --- a/src/caffeine/caffeine.c +++ b/src/caffeine/caffeine.c @@ -106,14 +106,14 @@ int caf_num_images(gex_TM_t tm) { // Given team and corresponding image_num, return image number in the initial team int caf_image_to_initial(gex_TM_t tm, int image_num) { assert(image_num >= 1); - assert(image_num <= gex_TM_QuerySize(tm)); + assert(image_num <= (int)gex_TM_QuerySize(tm)); gex_Rank_t proc = gex_TM_TranslateRankToJobrank(tm, image_num-1); return proc + 1; } // Given image number in the initial team, return image number corresponding to given team int caf_image_from_initial(gex_TM_t tm, int image_num) { assert(image_num >= 1); - assert(image_num <= numprocs); + assert(image_num <= (int)numprocs); gex_Rank_t proc = gex_TM_TranslateJobrankToRank(tm, image_num-1); // GEX_RANK_INVALID indicates the provided image_num in initial team is not part of tm assert(proc != GEX_RANK_INVALID); @@ -485,7 +485,7 @@ static void atomic_init(void) { void caf_atomic_int(int opcode, int image, void* addr, int64_t *result, int64_t op1, int64_t op2) { assert(atomic_AD != GEX_AD_INVALID); assert(addr); - assert(opcode >= 0 && opcode < sizeof(op_map)/sizeof(op_map[0])); + assert(opcode >= 0 && opcode < (int)(sizeof(op_map)/sizeof(op_map[0]))); gex_OP_t op = op_map[opcode]; gex_Event_Wait( @@ -630,7 +630,7 @@ static int64_t *widen_from_array(CFI_cdesc_t* a_desc, size_t num_elements) { } else if (a_desc->elem_len == 2) { int16_t *src = a_desc->base_addr; for (size_t i=0; i < num_elements; i++) res[i] = src[i]; - } else gasnett_fatalerror("Logic error in widen_from_array: %i", a_desc->elem_len); + } else gasnett_fatalerror("Logic error in widen_from_array: %i", (int)a_desc->elem_len); return res; } @@ -644,7 +644,7 @@ static void narrow_to_array(CFI_cdesc_t* a_desc, int64_t *src, size_t num_elemen } else if (a_desc->elem_len == 2) { int16_t *dst = a_desc->base_addr; for (size_t i=0; i < num_elements; i++) dst[i] = src[i]; - } else gasnett_fatalerror("Logic error in narrow_to_array: %i", a_desc->elem_len); + } else gasnett_fatalerror("Logic error in narrow_to_array: %i", (int)a_desc->elem_len); free(src); } From a38a5f63fcd3ea29094e16f189e1c2311288d777 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Wed, 11 Mar 2026 19:31:57 -0700 Subject: [PATCH 2/6] prif_co_reduce: Factor the Gfortran bug workaround The bug workaround for GFortran's non-compliant c_funloc is moved into a separate helper, where it can be called from Fortran. This helps to improve the precision of the workaround, because it enables placing its conditional invocation in the same translation unit where the problematic call to c_funloc appears. --- src/caffeine/caffeine.c | 14 ++++++++------ src/caffeine/co_reduce_s.F90 | 8 +++++++- src/caffeine/prif_private_s.F90 | 9 +++++++++ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/caffeine/caffeine.c b/src/caffeine/caffeine.c index 483f5eaf..fdad3adb 100644 --- a/src/caffeine/caffeine.c +++ b/src/caffeine/caffeine.c @@ -508,6 +508,13 @@ void caf_atomic_logical(int opcode, int image, void* addr, int64_t *result, int6 } //------------------------------------------------------------------- +// gfortran 13.2 .. 15 : c_funloc is non-compliant +// it erroneously generates a non-callable pointer to a pointer to the subroutine +// This helper is used to undo that incorrect extra level of indirection +typedef void (*funloc_t)(void); +funloc_t caf_c_funloc_deref(funloc_t funloc) { + return *(funloc_t *)funloc; +} void caf_co_reduce( CFI_cdesc_t* a_desc, int result_image, size_t num_elements, gex_Coll_ReduceFn_t user_op, void* client_data, gex_TM_t team @@ -516,12 +523,7 @@ void caf_co_reduce( assert(result_image >= 0); assert(num_elements > 0); assert(user_op); -#if PLATFORM_COMPILER_GNU - // gfortran 13.2 & 14 - c_funloc is non-compliant - // it erroneously generates a non-callable pointer to a pointer to the subroutine - // Here we undo that incorrect extra level of indirection - user_op = *(gex_Coll_ReduceFn_t *)user_op; -#endif + char* a_address = (char*) a_desc->base_addr; size_t c_sizeof_a = a_desc->elem_len; gex_Event_t ev; diff --git a/src/caffeine/co_reduce_s.F90 b/src/caffeine/co_reduce_s.F90 index 00d7a032..47166dc5 100644 --- a/src/caffeine/co_reduce_s.F90 +++ b/src/caffeine/co_reduce_s.F90 @@ -36,7 +36,13 @@ subroutine contiguous_co_reduce(a, operation_wrapper, cdata, result_image, stat, if (present(stat)) stat=0 - funptr = c_funloc(operation_wrapper) +# if __GFORTRAN__ + ! Gfortran 13..15 bug workaround + funptr = caf_c_funloc_deref(c_funloc(operation_wrapper)) +# else + funptr = c_funloc(operation_wrapper) +# endif + call_assert(c_associated(funptr)) call caf_co_reduce( & diff --git a/src/caffeine/prif_private_s.F90 b/src/caffeine/prif_private_s.F90 index 8bae9b8c..3908a7ab 100644 --- a/src/caffeine/prif_private_s.F90 +++ b/src/caffeine/prif_private_s.F90 @@ -352,6 +352,15 @@ subroutine caf_form_team(current_team, new_team, team_number, new_index) bind(C) integer(c_int), intent(in), value :: new_index end subroutine + ! ______________ Misc helpers __________________ + function caf_c_funloc_deref(funloc) result(res) bind(C) + !! funloc_t caf_c_funloc_deref(funloc_t funloc) + import c_funptr + implicit none + type(c_funptr), value :: funloc + type(c_funptr) :: res + end function + end interface interface num_to_str From 3eeec8bbec8d8b91fcbbe476ac590e79e4bebc43 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Wed, 11 Mar 2026 19:37:23 -0700 Subject: [PATCH 3/6] coarray_queries_s: Fix a harmless unused variable warning --- src/caffeine/coarray_queries_s.F90 | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/caffeine/coarray_queries_s.F90 b/src/caffeine/coarray_queries_s.F90 index d5135415..305a980b 100644 --- a/src/caffeine/coarray_queries_s.F90 +++ b/src/caffeine/coarray_queries_s.F90 @@ -58,8 +58,6 @@ end procedure module procedure prif_coshape - integer(c_int64_t) :: trailing_ucobound - call_assert(coarray_handle_check(coarray_handle)) call_assert(size(sizes) == coarray_handle%info%corank) From 6d1cb62ea352f03a7df69dda4d410efc8c702614 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Mon, 16 Mar 2026 11:34:03 -0700 Subject: [PATCH 4/6] Add more correctness assertions to validate teams and hierarchy * Add `team_check` to validate invariants of team descriptors * Add `team_check` assertions in various relevant places * Change `parent_team` field of `initial_team` to null --- src/caffeine/prif_private_s.F90 | 83 ++++++++++++++++++++++++++++++ src/caffeine/program_startup_s.F90 | 2 +- src/caffeine/teams_s.F90 | 25 ++++++++- 3 files changed, 107 insertions(+), 3 deletions(-) diff --git a/src/caffeine/prif_private_s.F90 b/src/caffeine/prif_private_s.F90 index 3908a7ab..498b292f 100644 --- a/src/caffeine/prif_private_s.F90 +++ b/src/caffeine/prif_private_s.F90 @@ -523,6 +523,89 @@ elemental impure function coarray_handle_check(coarray_handle) result(result_) result_ = .true. end function + ! verify state invariants for a team + ! Note this function validates invariants with deliberately UNconditional assertions + ! Suggested caller usage for conditional validation is: + ! call_assert(team_check(current_team)) + recursive function team_check(team, known_active, cycle_check) result(result_) + implicit none + type(prif_team_type), intent(in) :: team + logical, optional, intent(in) :: known_active ! is this known to be the current team or an ancestor team? + type(prif_team_type), optional, intent(in) :: cycle_check(:) + type(prif_team_type), allocatable :: cycle_check_(:) + logical :: result_, known_active_ + integer :: i + + call assert_always(associated(team%info), "unassociated info pointer in prif_team_type") + + ! check for invalid cycles in the team hierarchy + if (.not. present(cycle_check)) then ! initial call + cycle_check_ = [ team ] + else ! recursive call should never encounter a matching team as an ancestor + call assert_always(.not. any( [(associated(team%info, cycle_check(i)%info), i = 1, size(cycle_check))] ), & + "Invalid cycle detected in team ancestor hierarchy") + cycle_check_ = [ cycle_check, team ] + end if + + associate(info => team%info, ch_info => team%info%child_heap_info) + call assert_always(c_associated(info%gex_team), "invalid gex_team in team descriptor") + + if (associated(team%info, initial_team)) then ! initial team + call assert_always(info%team_number == -1, "invalid team_number in initial team descriptor") + call assert_always(.not. associated(info%parent_team), "invalid parent_team in initial team descriptor") + else ! non-initial team, have parent team + call assert_always(info%team_number > 0, "invalid team_number in initial team descriptor") + call assert_always(associated(info%parent_team), "invalid parent_team in team descriptor") + end if + + call assert_always(info%this_image == caf_this_image(info%gex_team), "invalid this_image in team descriptor") + call assert_always(info%num_images == caf_num_images(info%gex_team), "invalid num_images in team descriptor") + + ! determine activity of this team (is it the current team or an ancestor of current) + if (present(known_active)) then + known_active_ = known_active + else + known_active_ = .false. + end if + if (.not. known_active_) then + if (associated(team%info, initial_team)) then + known_active_ = .true. + else if (associated(current_team%info)) then + if (associated(team%info, current_team%info) .or. & + associated(team%info, current_team%info%parent_team)) then + known_active_ = .true. + end if + end if + end if + + if (known_active_) then + call assert_always(info%heap_start /= 0, "invalid heap_start in an active team descriptor") + call assert_always(info%heap_size > 0, "invalid heap_size in an active team descriptor") + if (info%this_image == 1) then + call assert_always(c_associated(info%heap_mspace), "invalid heap_mspace in an active team descriptor") + end if + end if + + if (associated(info%child_heap_info)) then ! have child teams + if (info%this_image == 1) then + call assert_always(c_associated(ch_info%allocated_memory), & + "invalid child_heap_info%allocated_memory in team descriptor") + call assert_always(ch_info%offset == as_int(ch_info%allocated_memory) - info%heap_start, & + "invalid child_heap_info%offset in team descriptor") + end if + call assert_always(ch_info%size > 0, "invalid child_heap_info%size in team descriptor") + call assert_always(ch_info%offset + ch_info%size <= info%heap_size, & + "invalid child_heap_info bounds in team descriptor") + end if + + if (associated(info%parent_team)) then ! recurse up the team tree + result_ = team_check(prif_team_type(info%parent_team), known_active_, cycle_check_) + end if + end associate + + result_ = .true. + end function + subroutine caf_establish_child_heap if (current_team%info%this_image == 1) then call caf_allocate_remaining( & diff --git a/src/caffeine/program_startup_s.F90 b/src/caffeine/program_startup_s.F90 index d571fcd6..8356f758 100644 --- a/src/caffeine/program_startup_s.F90 +++ b/src/caffeine/program_startup_s.F90 @@ -21,7 +21,7 @@ initial_team%gex_team) call assert_init() current_team%info => initial_team - initial_team%parent_team => initial_team + nullify(initial_team%parent_team) initial_team%team_number = -1 initial_team%this_image = caf_this_image(initial_team%gex_team) initial_team%num_images = caf_num_images(initial_team%gex_team) diff --git a/src/caffeine/teams_s.F90 b/src/caffeine/teams_s.F90 index ac00d29e..4e16f171 100644 --- a/src/caffeine/teams_s.F90 +++ b/src/caffeine/teams_s.F90 @@ -2,6 +2,7 @@ ! Terms of use are as specified in LICENSE.txt #include "version.h" +#include "assert_macros.h" submodule(prif:prif_private_s) teams_s ! DO NOT ADD USE STATEMENTS HERE @@ -10,9 +11,14 @@ contains module procedure prif_change_team + call_assert(team_check(current_team)) + call_assert(team_check(team)) + call_assert_describe(associated(team%info%parent_team) .and. associated(team%info%parent_team, current_team%info), "Invalid CHANGE TEAM. New team was not created by FORM TEAM within current team.") + call_assert(associated(current_team%info%child_heap_info)) + team%info%heap_start = current_team%info%child_heap_info%offset + current_team%info%heap_start team%info%heap_size = current_team%info%child_heap_info%size - if (caf_this_image(team%info%gex_team) == 1) then ! need to setup the heap for the team + if (team%info%this_image == 1) then ! need to setup the heap for the team call caf_establish_mspace( & team%info%heap_mspace, & as_c_ptr(team%info%heap_start), & @@ -22,6 +28,7 @@ if (caf_have_child_teams()) then ! need to establish heap for child teams call caf_establish_child_heap end if + call_assert(team_check(current_team)) call prif_sync_all ! child team sync required by F23 11.1.5.2 if (present(stat)) stat = 0 @@ -32,6 +39,9 @@ integer :: num_coarrays_in_team, i type(prif_coarray_descriptor), pointer :: tmp_data + call_assert(team_check(current_team)) + call_assert_describe(associated(current_team%info%parent_team), "Invalid END TEAM from the initial team.") + ! deallocate the teams coarrays ! Currently we work to batch together all the deallocations into a single call ! to prif_deallocate_coarray(), in the hope it can amortize some costs @@ -63,11 +73,14 @@ ! set the current team back to the parent team current_team%info => current_team%info%parent_team + call_assert(team_check(current_team)) if (present(stat)) stat = 0 end procedure module procedure prif_form_team + call_assert(team_check(current_team)) + call prif_sync_memory ! indicates this is the first time we're creating a child team @@ -97,11 +110,15 @@ team%info%num_images = caf_num_images(team%info%gex_team) end block + call_assert(team_check(team)) + if (present(stat)) stat = 0 end procedure module procedure prif_get_team - if (.not. present(level)) then + call_assert(team_check(current_team)) + + if (.not. present(level) .or. associated(current_team%info,initial_team)) then team = current_team else if (level == PRIF_CURRENT_TEAM) then team = current_team @@ -112,12 +129,16 @@ else call prif_error_stop(.false._c_bool, stop_code_char="prif_get_team: invalid level") endif + + call_assert(team_check(team)) end procedure module procedure prif_team_number if (present(team)) then + call_assert(team_check(team)) team_number = team%info%team_number else + call_assert(team_check(current_team)) team_number = current_team%info%team_number endif end procedure From e3bad77d3629ca4479b28def10c0967ff5ea7321 Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Mon, 16 Mar 2026 18:32:52 -0700 Subject: [PATCH 5/6] Add team_check in other PRIF procedures where a team is relevant --- src/caffeine/coarray_queries_s.F90 | 19 ++++++++++++------- src/caffeine/image_queries_s.F90 | 25 +++++++++++++++++++++++++ src/caffeine/program_startup_s.F90 | 5 +++++ src/caffeine/sync_stmt_s.F90 | 3 +++ 4 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/caffeine/coarray_queries_s.F90 b/src/caffeine/coarray_queries_s.F90 index 305a980b..b58de71c 100644 --- a/src/caffeine/coarray_queries_s.F90 +++ b/src/caffeine/coarray_queries_s.F90 @@ -26,6 +26,7 @@ module procedure prif_ucobound_with_dim call_assert(coarray_handle_check(coarray_handle)) + call_assert(team_check(current_team)) associate (info => coarray_handle%info, corank => coarray_handle%info%corank) call_assert(dim >= 1 .and. dim <= corank) @@ -60,6 +61,7 @@ module procedure prif_coshape call_assert(coarray_handle_check(coarray_handle)) call_assert(size(sizes) == coarray_handle%info%corank) + call_assert(team_check(current_team)) associate(info => coarray_handle%info, corank => coarray_handle%info%corank) if (corank == 1) then ! common-case optimization @@ -77,16 +79,17 @@ end associate end procedure - subroutine image_index_helper(coarray_handle, sub, num_images, image_index) + subroutine image_index_helper(coarray_handle, sub, team, image_index) implicit none type(prif_coarray_handle), intent(in) :: coarray_handle integer(c_int64_t), intent(in) :: sub(:) - integer(c_int), intent(in) :: num_images + type(prif_team_type), intent(in) :: team integer(c_int), intent(out) :: image_index integer :: dim call_assert(coarray_handle_check(coarray_handle)) + call_assert(team_check(team)) associate (info => coarray_handle%info, corank => coarray_handle%info%corank) call_assert(size(sub) == corank) @@ -106,24 +109,25 @@ subroutine image_index_helper(coarray_handle, sub, num_images, image_index) end do end associate - if (image_index .gt. num_images) then + if (image_index .gt. team%info%num_images) then image_index = 0 end if end subroutine module procedure prif_image_index - call image_index_helper(coarray_handle, sub, current_team%info%num_images, image_index) + call image_index_helper(coarray_handle, sub, current_team, image_index) end procedure module procedure prif_image_index_with_team - call image_index_helper(coarray_handle, sub, team%info%num_images, image_index) + call image_index_helper(coarray_handle, sub, team, image_index) end procedure module procedure prif_image_index_with_team_number + call_assert(team_check(current_team)) if (team_number == -1) then - call image_index_helper(coarray_handle, sub, initial_team%num_images, image_index) + call image_index_helper(coarray_handle, sub, prif_team_type(initial_team), image_index) else if (team_number == current_team%info%team_number) then - call image_index_helper(coarray_handle, sub, current_team%info%num_images, image_index) + call image_index_helper(coarray_handle, sub, current_team, image_index) else call unimplemented("prif_image_index_with_team_number: no support for sibling teams") end if @@ -141,6 +145,7 @@ subroutine initial_index_helper(coarray_handle, sub, team, initial_team_index) integer :: dim integer(c_int) :: image_index + call_assert(team_check(team)) call_assert(coarray_handle_check(coarray_handle)) associate (info => coarray_handle%info, corank => coarray_handle%info%corank) diff --git a/src/caffeine/image_queries_s.F90 b/src/caffeine/image_queries_s.F90 index 46fe2612..de63b832 100644 --- a/src/caffeine/image_queries_s.F90 +++ b/src/caffeine/image_queries_s.F90 @@ -11,14 +11,17 @@ contains module procedure prif_num_images + call_assert(team_check(current_team)) num_images = current_team%info%num_images end procedure module procedure prif_num_images_with_team + call_assert(team_check(team)) num_images = team%info%num_images end procedure module procedure prif_num_images_with_team_number + call_assert(team_check(current_team)) if (team_number == -1) then num_images = initial_team%num_images else if (team_number == current_team%info%team_number) then @@ -30,8 +33,10 @@ module procedure prif_this_image_no_coarray if (present(team)) then + call_assert(team_check(team)) this_image = team%info%this_image else + call_assert(team_check(current_team)) this_image = current_team%info%this_image endif end procedure @@ -44,8 +49,10 @@ call_assert(size(cosubscripts) == coarray_handle%info%corank) if (present(team)) then + call_assert(team_check(team)) offset = team%info%this_image - 1 else + call_assert(team_check(current_team)) offset = current_team%info%this_image - 1 endif @@ -90,16 +97,34 @@ end procedure module procedure prif_failed_images + if (present(team)) then + call_assert(team_check(team)) + else + call_assert(team_check(current_team)) + endif + ! no current support for detecting image failure allocate(failed_images(0)) end procedure module procedure prif_stopped_images + if (present(team)) then + call_assert(team_check(team)) + else + call_assert(team_check(current_team)) + endif + ! no current support for detecting image stops allocate(stopped_images(0)) end procedure module procedure prif_image_status + if (present(team)) then + call_assert(team_check(team)) + else + call_assert(team_check(current_team)) + endif + ! no current support for detecting image failure/stops image_status = 0 end procedure diff --git a/src/caffeine/program_startup_s.F90 b/src/caffeine/program_startup_s.F90 index 8356f758..5f04e8ae 100644 --- a/src/caffeine/program_startup_s.F90 +++ b/src/caffeine/program_startup_s.F90 @@ -1,5 +1,8 @@ ! Copyright (c), The Regents of the University of California ! Terms of use are as specified in LICENSE.txt + +#include "assert_macros.h" + submodule(prif:prif_private_s) program_startup_s ! DO NOT ADD USE STATEMENTS HERE ! All use statements belong in prif_private_s.F90 @@ -27,6 +30,8 @@ initial_team%num_images = caf_num_images(initial_team%gex_team) non_symmetric_heap_size = total_heap_size - initial_team%heap_size + call_assert(team_check(current_team)) + call sync_init() ! issue #259: Ensure we clear any IEEE FP exceptions potentially diff --git a/src/caffeine/sync_stmt_s.F90 b/src/caffeine/sync_stmt_s.F90 index 73294351..7724bfbb 100644 --- a/src/caffeine/sync_stmt_s.F90 +++ b/src/caffeine/sync_stmt_s.F90 @@ -16,11 +16,13 @@ contains module procedure prif_sync_all + call_assert(team_check(current_team)) call caf_sync_team(current_team%info%gex_team) if (present(stat)) stat = 0 end procedure module procedure prif_sync_team + call_assert(team_check(team)) call caf_sync_team(team%info%gex_team) if (present(stat)) stat = 0 end procedure @@ -61,6 +63,7 @@ integer(c_intptr_t) :: evt_ptr call_assert(coarray_handle_check(si_coarray_handle)) + call_assert(team_check(current_team)) call caf_sync_memory ! end segment and amortize release fence From 99fa78e970b9eaba9c12c2e9948ccfe09c8b053c Mon Sep 17 00:00:00 2001 From: Dan Bonachea Date: Mon, 16 Mar 2026 19:54:54 -0700 Subject: [PATCH 6/6] install: Add auto-retry of GASNet download Experience shows intermittent download failures from bitbucket (especially from within GitHub actions, for some reason), so enable automated retry. --- install.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/install.sh b/install.sh index 862324c9..e830e0dc 100755 --- a/install.sh +++ b/install.sh @@ -395,7 +395,7 @@ if ! $PKG_CONFIG $pkg ; then ask_package_permission "GASNet-EX" "PKG_CONFIG_PATH" exit_if_user_declines "GASNet-EX" - GASNET_TAR_FILE="GASNet-$GASNET_VERSION.tar.gz" + GASNET_TAR_FILE="$DEPENDENCIES_DIR/GASNet-$GASNET_VERSION.tar.gz" GASNET_SOURCE_URL="https://bitbucket.org/berkeleylab/gasnet/downloads/GASNet-$GASNET_VERSION.tar.gz" if [ ! -d $DEPENDENCIES_DIR ]; then mkdir -pv $DEPENDENCIES_DIR @@ -406,7 +406,8 @@ if ! $PKG_CONFIG $pkg ; then rm -Rf $GASNET_DIR fi - curl -L $GASNET_SOURCE_URL | tar xvzf - -C $DEPENDENCIES_DIR + curl -L $VERBOSE --retry 10 --retry-all-errors --fail $GASNET_SOURCE_URL -o $GASNET_TAR_FILE + tar xvzf $GASNET_TAR_FILE -C $DEPENDENCIES_DIR ( cd $GASNET_DIR