Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions cuda_core/cuda/core/_launch_config.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ from cuda.bindings cimport cydriver
cdef class LaunchConfig:
"""Customizable launch options."""
cdef:
public tuple grid
public tuple cluster
public tuple block
public int shmem_size
public bint is_cooperative
readonly tuple grid
readonly tuple cluster
readonly tuple block
readonly int shmem_size
readonly bint is_cooperative

vector[cydriver.CUlaunchAttribute] _attrs
cydriver.CUlaunchConfig _cached_drv_cfg
readonly bint _cache_valid
object __weakref__

cdef cydriver.CUlaunchConfig _to_native_launch_config(self)
Expand Down
17 changes: 13 additions & 4 deletions cuda_core/cuda/core/_launch_config.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ cdef class LaunchConfig:
self.shmem_size = shmem_size

self.is_cooperative = is_cooperative
self._cache_valid = False

if self.is_cooperative and not Device().properties.cooperative_launch:
raise CUDAError("cooperative kernels are not supported on this device")
Expand All @@ -112,19 +113,19 @@ cdef class LaunchConfig:
return hash(self._identity())

cdef cydriver.CUlaunchConfig _to_native_launch_config(self):
if self._cache_valid:
return self._cached_drv_cfg

cdef cydriver.CUlaunchConfig drv_cfg
cdef cydriver.CUlaunchAttribute attr
memset(&drv_cfg, 0, sizeof(drv_cfg))
self._attrs.resize(0)

# Handle grid dimensions and cluster configuration
if self.cluster is not None:
# Convert grid from cluster units to block units
drv_cfg.gridDimX = self.grid[0] * self.cluster[0]
drv_cfg.gridDimY = self.grid[1] * self.cluster[1]
drv_cfg.gridDimZ = self.grid[2] * self.cluster[2]

# Set up cluster attribute
attr.id = cydriver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION
attr.value.clusterDim.x, attr.value.clusterDim.y, attr.value.clusterDim.z = self.cluster
self._attrs.push_back(attr)
Expand All @@ -142,10 +143,18 @@ cdef class LaunchConfig:
drv_cfg.numAttrs = self._attrs.size()
drv_cfg.attrs = self._attrs.data()

# Cache the result. attrs points into self._attrs which is stable
# as long as _attrs is never resized after this point (guaranteed
# because we skip resize(0) on the fast path above).
self._cached_drv_cfg = drv_cfg
self._cache_valid = True
return drv_cfg


# TODO: once all modules are cythonized, this function can be dropped in favor of the cdef method above
# TODO: once all modules are cythonized, this function can be dropped in favor of the cdef method above.
# NOTE: unlike the cdef method above, this cpdef wrapper creates Python driver objects on every call
# and does NOT use the _cache_valid / _cached_drv_cfg cache. The cache is only in the cdef method,
# which is called from _launcher.pyx and _module.pyx.
cpdef object _to_native_launch_config(LaunchConfig config):
"""Convert LaunchConfig to native driver CUlaunchConfig.

Expand Down
11 changes: 11 additions & 0 deletions cuda_core/docs/source/release/1.0.1-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@
=================================


Breaking changes
----------------

- :class:`LaunchConfig` fields (``grid``, ``block``, ``cluster``,
``shmem_size``, ``is_cooperative``) are now read-only after construction.
Assigning to them from Python raises ``AttributeError``. Mutation was
previously possible but was never intended given that :class:`LaunchConfig`
is a hashable value type. Code that mutates a config after creation should
construct a new :class:`LaunchConfig` instead.


Fixes and enhancements
----------------------

Expand Down
70 changes: 70 additions & 0 deletions cuda_core/tests/test_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,76 @@ def test_launch_config_shmem_size():
assert config.shmem_size == 0


def test_launch_config_fields_are_readonly():
config = LaunchConfig(grid=(2, 2, 2), block=(4, 4, 4), shmem_size=256, is_cooperative=False)
typed_values = {
"grid": (1, 1, 1),
"block": (1, 1, 1),
"cluster": (1, 1, 1),
"shmem_size": 0,
"is_cooperative": False,
}
for field, value in typed_values.items():
with pytest.raises(AttributeError):
setattr(config, field, value)


def test_launch_config_native_conversion_stable(init_cuda):
"""The cpdef _to_native_launch_config wrapper returns consistent values across calls."""
from cuda.core._launch_config import _to_native_launch_config

config = LaunchConfig(grid=(4, 1, 1), block=(32, 1, 1))
first = _to_native_launch_config(config)
second = _to_native_launch_config(config)
assert first.gridDimX == second.gridDimX == 4
assert first.blockDimX == second.blockDimX == 32
assert first.sharedMemBytes == second.sharedMemBytes == 0
assert first.numAttrs == second.numAttrs == 0


def test_launch_config_native_conversion_stable_cooperative(init_cuda):
"""The cpdef _to_native_launch_config wrapper returns consistent attrs for cooperative configs."""
from cuda.core._launch_config import _to_native_launch_config

try:
config = LaunchConfig(grid=1, block=1, is_cooperative=True)
except CUDAError:
pytest.skip("Device does not support cooperative launches")
first = _to_native_launch_config(config)
second = _to_native_launch_config(config)
assert first.numAttrs == second.numAttrs == 1


def test_launch_config_native_conversion_stable_cluster(init_cuda):
"""The cpdef _to_native_launch_config wrapper returns consistent values for cluster configs."""
from cuda.core._launch_config import _to_native_launch_config

try:
config = LaunchConfig(grid=2, cluster=2, block=32)
except CUDAError:
pytest.skip("Device does not support thread block clusters")
first = _to_native_launch_config(config)
second = _to_native_launch_config(config)
assert first.gridDimX == second.gridDimX == 4 # 2 clusters * 2 blocks/cluster
assert first.numAttrs == second.numAttrs == 1 # cluster dimension attribute


def test_launch_config_cdef_cache_populated_by_launch(init_cuda):
"""The cdef _to_native_launch_config cache (_cache_valid) is set after launch() and persists."""
code = 'extern "C" __global__ void noop() {}'
program = Program(code, SourceCodeType.CXX)
ker = program.compile(ObjectCodeFormatType.CUBIN).get_kernel("noop")
stream = Device().create_stream()

config = LaunchConfig(grid=1, block=1)
assert not config._cache_valid
launch(stream, config, ker)
assert config._cache_valid
# Second launch reuses the cache (fast path) — _cache_valid stays True
launch(stream, config, ker)
assert config._cache_valid


def test_launch_config_cluster_grid_conversion(init_cuda):
"""Test that LaunchConfig preserves original grid values and conversion happens in native config."""
try:
Expand Down