Skip to content

Commit 43da4f9

Browse files
committed
update
1 parent 201ef45 commit 43da4f9

1 file changed

Lines changed: 21 additions & 29 deletions

File tree

workloads/gromacs/mpi_cxl_shim.c

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -760,21 +760,10 @@ static void init_cxl_memory(void) {
760760
header_size = (header_size + CXL_ALIGNMENT - 1) & ~(CXL_ALIGNMENT - 1);
761761
atomic_store(&g_cxl.header->alloc_offset, header_size);
762762

763-
// Initialize only the mailboxes that fit within the mapped region
764-
// Use cxl_safe_memset instead of memset to avoid SIMD SIGILL on CXL memory
765-
for (int i = 0; i < max_usable_ranks; i++) {
766-
atomic_store(&g_cxl.mailboxes[i].head, 0);
767-
atomic_store(&g_cxl.mailboxes[i].tail, 0);
768-
atomic_store(&g_cxl.mailboxes[i].active, 0);
769-
g_cxl.mailboxes[i].rank = i;
770-
g_cxl.mailboxes[i].pid = 0;
771-
cxl_safe_memset(g_cxl.mailboxes[i].hostname, 0, sizeof(g_cxl.mailboxes[i].hostname));
772-
773-
// Initialize message slots
774-
for (int j = 0; j < CXL_MSG_QUEUE_SIZE; j++) {
775-
atomic_store(&g_cxl.mailboxes[i].messages[j].state, CXL_MSG_EMPTY);
776-
}
777-
}
763+
// Initialize all mailboxes using cxl_safe_memset to avoid SIMD SIGILL.
764+
// Zero-fills all fields: head=0, tail=0, active=0, messages[].state=0(EMPTY).
765+
cxl_safe_memset(g_cxl.mailboxes, 0,
766+
max_usable_ranks * sizeof(cxl_rank_mailbox_t));
778767

779768
// Initialize collective operation fields
780769
atomic_store(&g_cxl.header->coll_barrier_count, 0);
@@ -850,14 +839,11 @@ static void cxl_register_rank(int rank, int world_size) {
850839
atomic_store(&g_cxl.header->alloc_offset, (uint64_t)CXL_HEADER_SIZE);
851840

852841
// Reset ALL mailbox slots to clear stale messages from previous runs.
853-
for (int i = 0; i < g_cxl.max_ranks; i++) {
854-
atomic_store(&g_cxl.mailboxes[i].active, 0);
855-
atomic_store(&g_cxl.mailboxes[i].head, 0);
856-
atomic_store(&g_cxl.mailboxes[i].tail, 0);
857-
for (int j = 0; j < CXL_MSG_QUEUE_SIZE; j++) {
858-
atomic_store(&g_cxl.mailboxes[i].messages[j].state, CXL_MSG_EMPTY);
859-
}
860-
}
842+
// Use cxl_safe_memset (volatile 8-byte writes) instead of atomic_store
843+
// loops — the compiler at -O3 can optimize atomic_store loops into SIMD
844+
// memset which causes SIGILL on CXL device memory.
845+
cxl_safe_memset(g_cxl.mailboxes, 0,
846+
g_cxl.max_ranks * sizeof(cxl_rank_mailbox_t));
861847

862848
__atomic_thread_fence(__ATOMIC_SEQ_CST);
863849

@@ -872,10 +858,16 @@ static void cxl_register_rank(int rank, int world_size) {
872858

873859
cxl_rank_mailbox_t *my_mailbox = &g_cxl.mailboxes[rank];
874860

875-
// Set up mailbox for this rank
876-
my_mailbox->rank = rank;
877-
my_mailbox->pid = getpid();
878-
gethostname(my_mailbox->hostname, sizeof(my_mailbox->hostname) - 1);
861+
// Set up mailbox for this rank (use volatile writes for scalar fields,
862+
// and cxl_safe_memcpy for strings — libc functions like gethostname/strncpy
863+
// use SIMD internally and SIGILL on CXL device memory)
864+
*(volatile uint32_t *)&my_mailbox->rank = rank;
865+
*(volatile uint32_t *)&my_mailbox->pid = getpid();
866+
{
867+
char tmp_hostname[64] = {0};
868+
gethostname(tmp_hostname, sizeof(tmp_hostname) - 1);
869+
cxl_safe_memcpy(my_mailbox->hostname, tmp_hostname, sizeof(my_mailbox->hostname));
870+
}
879871
atomic_store(&my_mailbox->head, 0);
880872
atomic_store(&my_mailbox->tail, 0);
881873
atomic_store(&my_mailbox->active, 1);
@@ -2432,8 +2424,8 @@ int MPI_Allreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype da
24322424
// Use MPI_Barrier for reliable synchronization across nodes
24332425
orig_MPI_Barrier(comm);
24342426

2435-
// Initialize result with my own data
2436-
memcpy(recvbuf, sendbuf, total_size);
2427+
// Initialize result with my own data (sendbuf may be in CXL memory)
2428+
cxl_safe_memcpy(recvbuf, sendbuf, total_size);
24372429

24382430
// Read and reduce data from all other ranks
24392431
for (int r = 0; r < g_cxl.world_size; r++) {

0 commit comments

Comments
 (0)