From d2d370ff697be5244d478bba97ef0d0489fc9643 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 16 Dec 2025 13:47:51 -0400 Subject: [PATCH 1/3] KernelIntrinsics Tweaks --- src/KernelAbstractions.jl | 2 +- src/intrinsics.jl | 4 ++-- test/intrinsics.jl | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index cdb4dd96..3881da55 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -833,7 +833,7 @@ include("macros.jl") ### function Scratchpad end -SharedMemory(t::Type{T}, dims::Val{Dims}, id::Val{Id}) where {T, Dims, Id} = KI.localmemory(t, dims) +SharedMemory(::Type{T}, dims::Val{Dims}, id::Val{Id}) where {T, Dims, Id} = KI.localmemory(T, dims) __synchronize() = KI.barrier() diff --git a/src/intrinsics.jl b/src/intrinsics.jl index 1811ad2f..79efa4cb 100644 --- a/src/intrinsics.jl +++ b/src/intrinsics.jl @@ -103,14 +103,14 @@ Returns the unique group ID. function get_group_id end """ - localmemory(T, dims) + localmemory(::Type{T}, dims) Declare memory that is local to a workgroup. !!! note Backend implementations **must** implement: ``` - @device_override localmemory(T::DataType, ::Val{Dims}) where {T, Dims} + @device_override localmemory(::Type{T}, ::Val{Dims}) where {T, Dims} ``` As well as the on-device functionality. """ diff --git a/test/intrinsics.jl b/test/intrinsics.jl index 97548c47..d4952a2d 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -89,7 +89,7 @@ function intrinsics_testsuite(backend, AT) @test KI.kernel_max_work_group_size(kernel) isa Int @test KI.kernel_max_work_group_size(kernel; max_work_items = 1) == 1 - kernel(results, workgroupsize = 4, numworkgroups = 4) + kernel(results; workgroupsize = 4, numworkgroups = 4) KernelAbstractions.synchronize(backend()) host_results = Array(results) From 04492acceab69385934d1e74e54c4f24b8ee280f Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Wed, 17 Dec 2025 13:53:19 -0400 Subject: [PATCH 2/3] Fix temporary AK compat --- .buildkite/pipeline.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index f1221aac..d25fd5fb 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -109,8 +109,7 @@ steps: julia -e 'println("--- :julia: Developing oneAPI") using Pkg Pkg.add(url="https://github.com/christiangnrd/oneAPI.jl", rev="intrinsics") - Pkg.develop(; name="AcceleratedKernels")' - sed -i 's/^KernelAbstractions = "0\.9.*"/KernelAbstractions = "0.10"/' \${JULIA_DEPOT_PATH}/dev/AcceleratedKernels/Project.toml + Pkg.add(url="https://github.com/christiangnrd/AcceleratedKernels.jl", rev="ka0.10simple")' julia -e 'println("--- :julia: Instantiating project") using Pkg Pkg.develop(; path=pwd())' || exit 3 @@ -141,8 +140,7 @@ steps: command: | julia -e 'println("--- :julia: Developing AMDGPU") using Pkg - Pkg.develop(; name="AcceleratedKernels")' - sed -i 's/^KernelAbstractions = "0\.9.*"/KernelAbstractions = "0.9, 0.10"/' \${JULIA_DEPOT_PATH}/dev/AcceleratedKernels/Project.toml + Pkg.add(url="https://github.com/christiangnrd/AcceleratedKernels.jl", rev="ka0.10simple")' julia -e ' using Pkg Pkg.add(url="https://github.com/christiangnrd/AMDGPU.jl", rev="intrinsics") From d6dc85fdcb4440121af972084d214933307de3cb Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 22 Dec 2025 15:39:43 -0400 Subject: [PATCH 3/3] Improve KI tests --- test/intrinsics.jl | 63 ++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/test/intrinsics.jl b/test/intrinsics.jl index d4952a2d..63216d32 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -1,21 +1,25 @@ import KernelAbstractions.KernelIntrinsics as KI +struct KernelData + global_size::Int + global_id::Int + local_size::Int + local_id::Int + num_groups::Int + group_id::Int +end function test_intrinsics_kernel(results) - # Test all intrinsics return NamedTuples with x, y, z fields - global_size = KI.get_global_size() - global_id = KI.get_global_id() - local_size = KI.get_local_size() - local_id = KI.get_local_id() - num_groups = KI.get_num_groups() - group_id = KI.get_group_id() - - if UInt32(global_id.x) <= UInt32(global_size.x) - results[1, global_id.x] = global_id.x - results[2, global_id.x] = local_id.x - results[3, global_id.x] = group_id.x - results[4, global_id.x] = global_size.x - results[5, global_id.x] = local_size.x - results[6, global_id.x] = num_groups.x + i = KI.get_global_id().x + + if i <= length(results) + @inbounds results[i] = KernelData( + KI.get_global_size().x, + KI.get_global_id().x, + KI.get_local_size().x, + KI.get_local_id().x, + KI.get_num_groups().x, + KI.get_group_id().x + ) end return end @@ -82,41 +86,40 @@ function intrinsics_testsuite(backend, AT) @test KI.multiprocessor_count(backend()) isa Int # Test with small kernel - N = 16 - results = AT(zeros(Int, 6, N)) + workgroupsize = 4 + numworkgroups = 4 + N = workgroupsize * numworkgroups + results = AT(Vector{KernelData}(undef, N)) kernel = KI.@kernel backend() launch = false test_intrinsics_kernel(results) @test KI.kernel_max_work_group_size(kernel) isa Int @test KI.kernel_max_work_group_size(kernel; max_work_items = 1) == 1 - kernel(results; workgroupsize = 4, numworkgroups = 4) + kernel(results; workgroupsize, numworkgroups) KernelAbstractions.synchronize(backend()) host_results = Array(results) # Verify results make sense - for i in 1:N - global_id_x, local_id_x, group_id_x, global_size_x, local_size_x, num_groups_x = host_results[:, i] + for (i, k_data) in enumerate(host_results) # Global IDs should be 1-based and sequential - @test global_id_x == i + @test k_data.global_id == i # Global size should match our ndrange - @test global_size_x == N + @test k_data.global_size == N - # Local size should be 4 (our workgroupsize) - @test local_size_x == 4 + @test k_data.local_size == workgroupsize - # Number of groups should be ceil(N/4) = 4 - @test num_groups_x == 4 + @test k_data.num_groups == numworkgroups # Group ID should be 1-based - expected_group = div(i - 1, 4) + 1 - @test group_id_x == expected_group + expected_group = div(i - 1, numworkgroups) + 1 + @test k_data.group_id == expected_group # Local ID should be 1-based within group - expected_local = ((i - 1) % 4) + 1 - @test local_id_x == expected_local + expected_local = ((i - 1) % workgroupsize) + 1 + @test k_data.local_id == expected_local end end end