From 0bb129ecc28cd8ed5c180af6d44343d3f776fd87 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Tue, 3 Feb 2026 15:31:12 -0800 Subject: [PATCH] Fix peer access test synchronization issue Add missing device synchronization calls to ensure resident device operations are complete before peer device accesses memory. The test was failing because when dev0 accesses peer memory from dev1, PatternGen only syncs dev0 (the accessing device) but not dev1 (the resident device). This can cause synchronization issues where dev0 reads peer memory before dev1 has completed all operations. Changes: - Sync dev1 after IPC import (Test 1) to ensure import operations complete - Sync dev1 after granting peer access (Test 3) before dev0 accesses peer memory This follows CUDA best practices: when accessing peer memory, sync the resident device to ensure its operations are complete before the peer device reads the memory. Fixes test failures on ARM64 with CUDA 13.2 RC025. Co-authored-by: Cursor --- cuda_core/tests/memory_ipc/test_peer_access.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cuda_core/tests/memory_ipc/test_peer_access.py b/cuda_core/tests/memory_ipc/test_peer_access.py index 5a06133c9b..3f3ca021f7 100644 --- a/cuda_core/tests/memory_ipc/test_peer_access.py +++ b/cuda_core/tests/memory_ipc/test_peer_access.py @@ -94,6 +94,8 @@ def child_main(self, mr, buffer): # Test 1: Buffer accessible from resident device (dev1) - should always work dev1 = Device(1) dev1.set_current() + # Sync dev1 to ensure IPC import operations are complete + dev1.sync() PatternGen(dev1, NBYTES).verify_buffer(buffer, seed=False) # Test 2: Buffer NOT accessible from dev0 initially (peer access not preserved) @@ -106,6 +108,9 @@ def child_main(self, mr, buffer): dev1.set_current() mr.peer_accessible_by = [0] assert mr.peer_accessible_by == (0,) + # Sync dev1 to ensure peer access setup and any pending operations are complete + # before dev0 accesses the peer memory + dev1.sync() dev0.set_current() PatternGen(dev0, NBYTES).verify_buffer(buffer, seed=False)