diff --git a/oshmem/mca/spml/base/base.h b/oshmem/mca/spml/base/base.h index 47b66bc15a5..7a34893c677 100644 --- a/oshmem/mca/spml/base/base.h +++ b/oshmem/mca/spml/base/base.h @@ -29,6 +29,15 @@ BEGIN_C_DECLS +/** + * Base team structure - common fields for all SPML team implementations + */ +struct mca_spml_base_team_t { + long *pSync; /* Synchronization work array */ + long *pWrk; /* Reduction work array */ +}; +typedef struct mca_spml_base_team_t mca_spml_base_team_t; + /* * This is the base priority for a SPML wrapper component * If there exists more than one then it is undefined @@ -97,6 +106,20 @@ OSHMEM_DECLSPEC void mca_spml_base_memuse_hook(void *addr, size_t length); OSHMEM_DECLSPEC int mca_spml_base_put_all_nb(void *target, const void *source, size_t size, long *counter); +/** + * Helper function to allocate and initialize a sync array using private_alloc + * @param count Number of long elements to allocate + * @param array Pointer to store the allocated array address + * @return OSHMEM_SUCCESS or OSHMEM_ERROR + */ +OSHMEM_DECLSPEC int mca_spml_base_alloc_sync_array(size_t count, long **array); + +/** + * Helper function to free a sync array using private_free + * @param array Pointer to the array pointer (will be set to NULL) + */ +OSHMEM_DECLSPEC void mca_spml_base_free_sync_array(long **array); + /* * MCA framework */ diff --git a/oshmem/mca/spml/base/spml_base.c b/oshmem/mca/spml/base/spml_base.c index 1362e07ecc5..e0c12522b11 100644 --- a/oshmem/mca/spml/base/spml_base.c +++ b/oshmem/mca/spml/base/spml_base.c @@ -16,6 +16,9 @@ #include "opal/datatype/opal_convertor.h" #include "oshmem/proc/proc.h" #include "oshmem/mca/spml/base/base.h" +#include "oshmem/mca/memheap/memheap.h" +#include "oshmem/mca/memheap/base/base.h" +#include "oshmem/include/shmem.h" #include "opal/mca/btl/btl.h" #define SPML_BASE_DO_CMP(_res, _addr, _op, _val) \ @@ -176,3 +179,24 @@ int mca_spml_base_put_all_nb(void *target, const void *source, { return OSHMEM_ERR_NOT_IMPLEMENTED; } + +/* Helper function to allocate and initialize a single sync array */ +int mca_spml_base_alloc_sync_array(size_t count, long **array) +{ + MCA_MEMHEAP_CALL(private_alloc(count * sizeof(long), (void **)array)); + if (*array == NULL) { + SPML_ERROR("Failed to allocate sync array"); + return OSHMEM_ERROR; + } + memset(*array, 0, count * sizeof(long)); + return OSHMEM_SUCCESS; +} + +/* Helper function to free a single sync array */ +void mca_spml_base_free_sync_array(long **array) +{ + if (*array != NULL) { + MCA_MEMHEAP_CALL(private_free(*array)); + *array = NULL; + } +} diff --git a/oshmem/mca/spml/ucx/spml_ucx.c b/oshmem/mca/spml/ucx/spml_ucx.c index 9fc1d965524..877c8c4cfa9 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.c +++ b/oshmem/mca/spml/ucx/spml_ucx.c @@ -1870,9 +1870,25 @@ int mca_spml_ucx_team_split_strided(shmem_team_t parent_team, int start, int ucx_new_team->parent_team = (mca_spml_ucx_team_t*)parent_team; - *new_team = (shmem_team_t)ucx_new_team; + /* Allocate pSync array */ + if (mca_spml_base_alloc_sync_array(SHMEM_SYNC_SIZE, &ucx_new_team->super.pSync) != OSHMEM_SUCCESS) { + goto cleanup_config; + } + + /* Allocate pWrk array */ + if (mca_spml_base_alloc_sync_array(SHMEM_REDUCE_MIN_WRKDATA_SIZE, &ucx_new_team->super.pWrk) != OSHMEM_SUCCESS) { + goto cleanup_psync; + } + *new_team = (shmem_team_t)ucx_new_team; return OSHMEM_SUCCESS; + +cleanup_psync: + mca_spml_base_free_sync_array(&ucx_new_team->super.pSync); +cleanup_config: + free(ucx_new_team->config); + free(ucx_new_team); + return OSHMEM_ERROR; } int mca_spml_ucx_team_split_2d(shmem_team_t parent_team, int xrange, const @@ -1923,6 +1939,10 @@ int mca_spml_ucx_team_destroy(shmem_team_t team) SPML_UCX_VALIDATE_TEAM(team); + /* Free pSync and pWrk using private_free */ + mca_spml_base_free_sync_array(&ucx_team->super.pSync); + mca_spml_base_free_sync_array(&ucx_team->super.pWrk); + free(ucx_team->config); free(team); diff --git a/oshmem/mca/spml/ucx/spml_ucx.h b/oshmem/mca/spml/ucx/spml_ucx.h index 0d1f345e925..9a75bf73f4b 100644 --- a/oshmem/mca/spml/ucx/spml_ucx.h +++ b/oshmem/mca/spml/ucx/spml_ucx.h @@ -130,6 +130,7 @@ typedef struct mca_spml_ucx_team_config { } mca_spml_ucx_team_config_t; typedef struct mca_spml_ucx_team { + mca_spml_base_team_t super; int n_pes; int my_pe; int stride; diff --git a/oshmem/runtime/oshmem_shmem_finalize.c b/oshmem/runtime/oshmem_shmem_finalize.c index 1950e20fdd1..442dbdc0a03 100644 --- a/oshmem/runtime/oshmem_shmem_finalize.c +++ b/oshmem/runtime/oshmem_shmem_finalize.c @@ -90,9 +90,19 @@ int oshmem_shmem_finalize(void) static int _shmem_finalize(void) { int ret = OSHMEM_SUCCESS; + mca_spml_base_team_t *world_team, *shared_team; shmem_barrier_all(); + /* Free pSync and pWrk for predefined teams */ + world_team = (mca_spml_base_team_t *)oshmem_team_world; + shared_team = (mca_spml_base_team_t *)oshmem_team_shared; + + mca_spml_base_free_sync_array(&world_team->pSync); + mca_spml_base_free_sync_array(&world_team->pWrk); + mca_spml_base_free_sync_array(&shared_team->pSync); + mca_spml_base_free_sync_array(&shared_team->pWrk); + shmem_lock_finalize(); /* Finalize preconnect framework */ diff --git a/oshmem/runtime/oshmem_shmem_init.c b/oshmem/runtime/oshmem_shmem_init.c index 64af4e18f3e..a3ac8f69fd4 100644 --- a/oshmem/runtime/oshmem_shmem_init.c +++ b/oshmem/runtime/oshmem_shmem_init.c @@ -95,11 +95,54 @@ shmem_internal_mutex_t shmem_internal_mutex_alloc = {{0}}; shmem_ctx_t oshmem_ctx_default = NULL; -shmem_team_t oshmem_team_shared = NULL; -shmem_team_t oshmem_team_world = NULL; +/* Predefined teams - statically allocated base team structures */ +mca_spml_base_team_t oshmem_team_world_instance = {.pSync = NULL, .pWrk = NULL}; +mca_spml_base_team_t oshmem_team_shared_instance = {.pSync = NULL, .pWrk = NULL}; + +/* Pointers to predefined teams */ +shmem_team_t oshmem_team_world = (shmem_team_t)&oshmem_team_world_instance; +shmem_team_t oshmem_team_shared = (shmem_team_t)&oshmem_team_shared_instance; static int _shmem_init(int argc, char **argv, int requested, int *provided); +/* Helper function to allocate pSync and pWrk for predefined teams */ +static int world_and_shared_teams_alloc(void) +{ + int ret; + + /* Allocate pSync for SHMEM_TEAM_WORLD */ + ret = mca_spml_base_alloc_sync_array(SHMEM_SYNC_SIZE, &oshmem_team_world_instance.pSync); + if (OSHMEM_SUCCESS != ret) { + return ret; + } + + /* Allocate pWrk for SHMEM_TEAM_WORLD */ + ret = mca_spml_base_alloc_sync_array(SHMEM_REDUCE_MIN_WRKDATA_SIZE, &oshmem_team_world_instance.pWrk); + if (OSHMEM_SUCCESS != ret) { + mca_spml_base_free_sync_array(&oshmem_team_world_instance.pSync); + return ret; + } + + /* Allocate pSync for SHMEM_TEAM_SHARED */ + ret = mca_spml_base_alloc_sync_array(SHMEM_SYNC_SIZE, &oshmem_team_shared_instance.pSync); + if (OSHMEM_SUCCESS != ret) { + mca_spml_base_free_sync_array(&oshmem_team_world_instance.pWrk); + mca_spml_base_free_sync_array(&oshmem_team_world_instance.pSync); + return ret; + } + + /* Allocate pWrk for SHMEM_TEAM_SHARED */ + ret = mca_spml_base_alloc_sync_array(SHMEM_REDUCE_MIN_WRKDATA_SIZE, &oshmem_team_shared_instance.pWrk); + if (OSHMEM_SUCCESS != ret) { + mca_spml_base_free_sync_array(&oshmem_team_shared_instance.pSync); + mca_spml_base_free_sync_array(&oshmem_team_world_instance.pWrk); + mca_spml_base_free_sync_array(&oshmem_team_world_instance.pSync); + return ret; + } + + return OSHMEM_SUCCESS; +} + #if OSHMEM_OPAL_THREAD_ENABLE static void* shmem_opal_thread(void* argc) { @@ -403,6 +446,13 @@ static int _shmem_init(int argc, char **argv, int requested, int *provided) OPAL_TIMING_ENV_NEXT(timing, "mca_scoll_enable()"); + /* Initialize pSync and pWrk for SHMEM_TEAM_WORLD and SHMEM_TEAM_SHARED teams */ + if (OSHMEM_SUCCESS != (ret = world_and_shared_teams_alloc())) { + error = "Failed to allocate sync arrays for predefined teams"; + goto error; + } + OPAL_TIMING_ENV_NEXT(timing, "world_and_shared_teams_alloc()"); + (*provided) = oshmem_mpi_thread_provided; oshmem_mpi_thread_multiple = (oshmem_mpi_thread_provided == SHMEM_THREAD_MULTIPLE) ? true : false;