From 79448604ee444aef12ce0fd6bc409645a91e1321 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Wed, 4 Feb 2026 16:36:39 +0000 Subject: [PATCH] feat: Add SVE kernels for TopKV Change-Id: I7a0c7bd1154b9cb7f35c7fd1c3b8ad54698f8799 Signed-off-by: Pablo Marquez Tello --- filelist.json | 37 ++--- src/BUILD.bazel | 7 +- src/CMakeLists.txt | 5 + src/cpu/kernels/CpuTopKVKernel.cpp | 27 ++++ src/cpu/kernels/topkv/generic/sve/fp16.cpp | 66 +++++++++ src/cpu/kernels/topkv/generic/sve/fp32.cpp | 66 +++++++++ src/cpu/kernels/topkv/generic/sve/impl.h | 131 ++++++++++++++++++ src/cpu/kernels/topkv/generic/sve/integer.cpp | 66 +++++++++ src/cpu/kernels/topkv/generic/sve/qasymm8.cpp | 66 +++++++++ .../topkv/generic/sve/qasymm8_signed.cpp | 67 +++++++++ src/cpu/kernels/topkv/list.h | 5 + 11 files changed, 525 insertions(+), 18 deletions(-) create mode 100644 src/cpu/kernels/topkv/generic/sve/fp16.cpp create mode 100644 src/cpu/kernels/topkv/generic/sve/fp32.cpp create mode 100644 src/cpu/kernels/topkv/generic/sve/impl.h create mode 100644 src/cpu/kernels/topkv/generic/sve/integer.cpp create mode 100644 src/cpu/kernels/topkv/generic/sve/qasymm8.cpp create mode 100644 src/cpu/kernels/topkv/generic/sve/qasymm8_signed.cpp diff --git a/filelist.json b/filelist.json index 1fea5a80e7..03b3cf1b78 100644 --- a/filelist.json +++ b/filelist.json @@ -2453,26 +2453,29 @@ ] } }, + "TopKV": { - "files": { - "common": [ - "src/cpu/kernels/CpuTopKVKernel.cpp", - "src/cpu/operators/CpuTopKV.cpp", - "src/runtime/NEON/functions/NETopKV.cpp" - ], - "neon": { - "fp16": [ "src/cpu/kernels/topkv/generic/neon/fp16.cpp" ], - "fp32": [ "src/cpu/kernels/topkv/generic/neon/fp32.cpp" ], - "integer":["src/cpu/kernels/topkv/generic/neon/integer.cpp"], - "qasymm8": [ - "src/cpu/kernels/topkv/generic/neon/qasymm8.cpp" + "files": { + "common": [ + "src/cpu/kernels/CpuTopKVKernel.cpp", + "src/cpu/operators/CpuTopKV.cpp", + "src/runtime/NEON/functions/NETopKV.cpp" ], - "qasymm8_signed": [ - "src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp" - ] + "neon": { + "fp16": [ "src/cpu/kernels/topkv/generic/neon/fp16.cpp" ], + "fp32": [ "src/cpu/kernels/topkv/generic/neon/fp32.cpp" ], + "integer": [ "src/cpu/kernels/topkv/generic/neon/integer.cpp" ], + "qasymm8": [ "src/cpu/kernels/topkv/generic/neon/qasymm8.cpp" ], + "qasymm8_signed": [ "src/cpu/kernels/topkv/generic/neon/qasymm8_signed.cpp" ] + }, + "sve": { + "fp32": [ "src/cpu/kernels/topkv/generic/sve/fp32.cpp" ], + "fp16": [ "src/cpu/kernels/topkv/generic/sve/fp16.cpp" ], + "integer": [ "src/cpu/kernels/topkv/generic/sve/integer.cpp" ], + "qasymm8": [ "src/cpu/kernels/topkv/generic/sve/qasymm8.cpp" ], + "qasymm8_signed": [ "src/cpu/kernels/topkv/generic/sve/qasymm8_signed.cpp" ] + } } - - } }, "Transpose": { "files": { diff --git a/src/BUILD.bazel b/src/BUILD.bazel index 0c2a0cda99..9019ba2762 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -395,7 +395,12 @@ filegroup( "cpu/kernels/scale/sve/qasymm8.cpp", "cpu/kernels/scale/sve/qasymm8_signed.cpp", "cpu/kernels/softmax/generic/sve/impl.cpp", - "cpu/kernels/softmax/generic/sve/impl_bf16.cpp"] + + "cpu/kernels/softmax/generic/sve/impl_bf16.cpp", + "cpu/kernels/topkv/generic/sve/fp16.cpp", + "cpu/kernels/topkv/generic/sve/fp32.cpp", + "cpu/kernels/topkv/generic/sve/integer.cpp", + "cpu/kernels/topkv/generic/sve/qasymm8.cpp", + "cpu/kernels/topkv/generic/sve/qasymm8_signed.cpp"] + glob(["**/*.h", "**/*.hpp", "**/*.inl"]), diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bba1506afe..64fcc030f3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -365,6 +365,11 @@ target_sources( cpu/kernels/scale/sve/qasymm8_signed.cpp cpu/kernels/softmax/generic/sve/impl.cpp cpu/kernels/softmax/generic/sve/impl_bf16.cpp + cpu/kernels/topkv/generic/sve/fp16.cpp + cpu/kernels/topkv/generic/sve/fp32.cpp + cpu/kernels/topkv/generic/sve/integer.cpp + cpu/kernels/topkv/generic/sve/qasymm8.cpp + cpu/kernels/topkv/generic/sve/qasymm8_signed.cpp ) target_sources( diff --git a/src/cpu/kernels/CpuTopKVKernel.cpp b/src/cpu/kernels/CpuTopKVKernel.cpp index 063338177e..4c55555865 100644 --- a/src/cpu/kernels/CpuTopKVKernel.cpp +++ b/src/cpu/kernels/CpuTopKVKernel.cpp @@ -43,15 +43,42 @@ namespace { static const std::vector available_kernels = { + + {"sve_fp16_topkv", + [](const CpuTopKVKernelDataTypeISASelectorData &data) + { return (data.dt == DataType::F16) && data.isa.fp16 && data.isa.sve; }, + REGISTER_FP16_SVE(arm_compute::cpu::topkv_fp16_sve)}, + + {"sve_fp32_topkv", + [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::F32) && data.isa.sve; }, + REGISTER_FP32_SVE(arm_compute::cpu::topkv_fp32_sve)}, + + {"sve_qasymm8_topkv", + [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::QASYMM8) && data.isa.sve; }, + REGISTER_QASYMM8_SVE(arm_compute::cpu::topkv_qasymm8_sve)}, + + {"sve_qasymm8_signed_topkv", + [](const CpuTopKVKernelDataTypeISASelectorData &data) + { return (data.dt == DataType::QASYMM8_SIGNED) && data.isa.sve; }, + REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::topkv_qasymm8_signed_sve)}, + + {"sve_s32_topkv", + [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::S32) && data.isa.sve; }, + REGISTER_INTEGER_SVE(arm_compute::cpu::topkv_s32_sve)}, + {"neon_s32_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::S32); }, REGISTER_INTEGER_NEON(arm_compute::cpu::topkv_s32_neon)}, + {"neon_fp32_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::F32); }, REGISTER_FP32_NEON(arm_compute::cpu::topkv_fp32_neon)}, + {"neon_fp16_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::F16) && data.isa.fp16; }, REGISTER_FP16_NEON(arm_compute::cpu::topkv_fp16_neon)}, + {"neon_qu8_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::QASYMM8); }, REGISTER_QASYMM8_NEON(arm_compute::cpu::topkv_qasymm8_neon)}, + {"neon_qs8_topkv", [](const CpuTopKVKernelDataTypeISASelectorData &data) { return (data.dt == DataType::QASYMM8_SIGNED); }, REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::topkv_qasymm8_signed_neon)}}; diff --git a/src/cpu/kernels/topkv/generic/sve/fp16.cpp b/src/cpu/kernels/topkv/generic/sve/fp16.cpp new file mode 100644 index 0000000000..bb579861ce --- /dev/null +++ b/src/cpu/kernels/topkv/generic/sve/fp16.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__ARM_FEATURE_SVE) + +#include "src/cpu/kernels/topkv/generic/sve/impl.h" + +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ + +template <> +inline uint32_t vector_length() +{ + return static_cast(svcnth()); +} + +template <> +inline uint32_t count_gt_block(const float16_t *ptr, float16_t thr, uint32_t block_elems) +{ + const svbool_t pg = svwhilelt_b16(static_cast(0), static_cast(block_elems)); + const svfloat16_t v = svld1_f16(pg, ptr); + const svbool_t gt = svcmpgt_n_f16(pg, v, thr); + return static_cast(svcntp_b16(svptrue_b16(), gt)); +} + +} // namespace detail + +void topkv_fp16_sve(const ITensor *predictions, const ITensor *targets, ITensor *out, uint32_t k, const Window &win) +{ + detail::topkv_sve_wrapper(predictions, targets, out, k, win); +} + +// Force instantiation into this TU +template void +detail::topkv_sve_wrapper(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &); + +} // namespace cpu +} // namespace arm_compute + +#endif // __ARM_FEATURE_SVE diff --git a/src/cpu/kernels/topkv/generic/sve/fp32.cpp b/src/cpu/kernels/topkv/generic/sve/fp32.cpp new file mode 100644 index 0000000000..e39e6c5a21 --- /dev/null +++ b/src/cpu/kernels/topkv/generic/sve/fp32.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__ARM_FEATURE_SVE) + +#include "src/cpu/kernels/topkv/generic/sve/impl.h" + +#include +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ + +template <> +inline uint32_t vector_length() +{ + return static_cast(svcntw()); +} + +template <> +inline uint32_t count_gt_block(const float *ptr, float thr, uint32_t block_elems) +{ + const svbool_t pg = svwhilelt_b32(static_cast(0), static_cast(block_elems)); + const svfloat32_t v = svld1_f32(pg, ptr); + const svbool_t gt = svcmpgt_n_f32(pg, v, thr); + return static_cast(svcntp_b32(svptrue_b32(), gt)); +} + +} // namespace detail + +void topkv_fp32_sve(const ITensor *predictions, const ITensor *targets, ITensor *out, uint32_t k, const Window &win) +{ + detail::topkv_sve_wrapper(predictions, targets, out, k, win); +} + +// Force instantiation into this TU +template void detail::topkv_sve_wrapper(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &); + +} // namespace cpu +} // namespace arm_compute + +#endif // __ARM_FEATURE_SVE diff --git a/src/cpu/kernels/topkv/generic/sve/impl.h b/src/cpu/kernels/topkv/generic/sve/impl.h new file mode 100644 index 0000000000..e5a1f0da40 --- /dev/null +++ b/src/cpu/kernels/topkv/generic/sve/impl.h @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_CPU_KERNELS_TOPKV_GENERIC_SVE_IMPL_H +#define ACL_SRC_CPU_KERNELS_TOPKV_GENERIC_SVE_IMPL_H + +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Window.h" + +#include +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ + +/* + * Type-specific hooks (declared here, defined in each cpp). + * + * - vector_length() + * Return the SVE vector length in elements for Scalar (no clamping). + * + * - count_gt_block(ptr, thr, block_elems) + * Count how many elements in [ptr, ptr + block_elems) are > thr. + * Tail-safe via predicate. block_elems is always <= vector_length(). + * + t contains the SVE intrinsics + * (e.g., qasymm8.cpp, qasymm8_signed.cpp, fp16.cpp, fp32.cpp, integer.cpp). + */ + +template +uint32_t vector_length(); + +template +uint32_t count_gt_block(const Scalar *ptr, Scalar thr, uint32_t block_elems); + +// ---------------------------------------------------------------------------- +// Generic wrapper (type-agnostic) - uses the above hooks. +// Semantics (matching TopKV tests you showed): +// - predictions is N x C +// - window iterates across output elements (classes) => id.x() == class index c +// - for each class c, targets[c] gives the sample index t +// - scan across N samples and compute rank (#samples with value > predictions[t]) +// - output is U8 boolean: (rank < k) +// ---------------------------------------------------------------------------- +template +inline void +topkv_sve_wrapper(const ITensor *predictions, const ITensor *targets, ITensor *out, uint32_t k, const Window &window) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(predictions, targets, out); + ARM_COMPUTE_ERROR_ON(k == 0); + + const ITensorInfo *pred_info = predictions->info(); + const uint32_t N = pred_info->dimension(0); // samples + const uint32_t C = pred_info->dimension(1); // classes + + const uint32_t vl = vector_length(); // cache once per kernel invocation + + Iterator tgt_it(targets, window); + Iterator out_it(out, window); + + execute_window_loop( + window, + [&](const Coordinates &id) + { + const uint32_t c = static_cast(id.x()); // class index + ARM_COMPUTE_ERROR_ON(c >= C); + + uint32_t t = {*reinterpret_cast(tgt_it.ptr())}; + ARM_COMPUTE_ERROR_ON(t >= N); + + const Scalar *col_ptr = reinterpret_cast(predictions->ptr_to_element(Coordinates(0, c))); + ARM_COMPUTE_ERROR_ON(col_ptr == nullptr); + + const Scalar thr = col_ptr[t]; + + uint32_t rank = 0; + uint32_t idx = 0; + + while (idx < N) + { + const uint32_t remaining = N - idx; + const uint32_t bw = (remaining < vl) ? remaining : vl; + + rank += count_gt_block(col_ptr + idx, thr, bw); + + if (rank >= k) + { + break; + } + + idx += bw; + } + + *reinterpret_cast(out_it.ptr()) = static_cast(rank < k); + }, + tgt_it, out_it); +} + +} // namespace detail +} // namespace cpu +} // namespace arm_compute + +#endif // ACL_SRC_CPU_KERNELS_TOPKV_GENERIC_SVE_IMPL_H diff --git a/src/cpu/kernels/topkv/generic/sve/integer.cpp b/src/cpu/kernels/topkv/generic/sve/integer.cpp new file mode 100644 index 0000000000..c3adc5bf15 --- /dev/null +++ b/src/cpu/kernels/topkv/generic/sve/integer.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__ARM_FEATURE_SVE) + +#include "src/cpu/kernels/topkv/generic/sve/impl.h" + +#include +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ + +template <> +inline uint32_t vector_length() +{ + return static_cast(svcntw()); +} + +template <> +inline uint32_t count_gt_block(const int32_t *ptr, int32_t thr, uint32_t block_elems) +{ + const svbool_t pg = svwhilelt_b32(static_cast(0), static_cast(block_elems)); + const svint32_t v = svld1_s32(pg, ptr); + const svbool_t gt = svcmpgt_n_s32(pg, v, thr); + return static_cast(svcntp_b32(svptrue_b32(), gt)); +} + +} // namespace detail + +void topkv_s32_sve(const ITensor *predictions, const ITensor *targets, ITensor *out, uint32_t k, const Window &win) +{ + detail::topkv_sve_wrapper(predictions, targets, out, k, win); +} + +// Force instantiation into this TU +template void detail::topkv_sve_wrapper(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &); + +} // namespace cpu +} // namespace arm_compute + +#endif // __ARM_FEATURE_SVE diff --git a/src/cpu/kernels/topkv/generic/sve/qasymm8.cpp b/src/cpu/kernels/topkv/generic/sve/qasymm8.cpp new file mode 100644 index 0000000000..13ee8c93bf --- /dev/null +++ b/src/cpu/kernels/topkv/generic/sve/qasymm8.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__ARM_FEATURE_SVE) + +#include "src/cpu/kernels/topkv/generic/sve/impl.h" + +#include +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ + +template <> +inline uint32_t vector_length() +{ + return static_cast(svcntb()); +} + +template <> +inline uint32_t count_gt_block(const uint8_t *ptr, uint8_t thr, uint32_t block_elems) +{ + const svbool_t pg = svwhilelt_b8(static_cast(0), static_cast(block_elems)); + const svuint8_t v = svld1_u8(pg, ptr); + const svbool_t gt = svcmpgt_n_u8(pg, v, thr); + return static_cast(svcntp_b8(svptrue_b8(), gt)); +} + +} // namespace detail + +void topkv_qasymm8_sve(const ITensor *predictions, const ITensor *targets, ITensor *out, uint32_t k, const Window &win) +{ + detail::topkv_sve_wrapper(predictions, targets, out, k, win); +} + +// Force instantiation into this TU +template void detail::topkv_sve_wrapper(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &); + +} // namespace cpu +} // namespace arm_compute + +#endif // __ARM_FEATURE_SVE diff --git a/src/cpu/kernels/topkv/generic/sve/qasymm8_signed.cpp b/src/cpu/kernels/topkv/generic/sve/qasymm8_signed.cpp new file mode 100644 index 0000000000..982d8044f4 --- /dev/null +++ b/src/cpu/kernels/topkv/generic/sve/qasymm8_signed.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2026 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(__ARM_FEATURE_SVE) + +#include "src/cpu/kernels/topkv/generic/sve/impl.h" + +#include +#include + +namespace arm_compute +{ +namespace cpu +{ +namespace detail +{ + +template <> +inline uint32_t vector_length() +{ + return static_cast(svcntb()); +} + +template <> +inline uint32_t count_gt_block(const int8_t *ptr, int8_t thr, uint32_t block_elems) +{ + const svbool_t pg = svwhilelt_b8(static_cast(0), static_cast(block_elems)); + const svint8_t v = svld1_s8(pg, ptr); + const svbool_t gt = svcmpgt_n_s8(pg, v, thr); + return static_cast(svcntp_b8(svptrue_b8(), gt)); +} + +} // namespace detail + +void topkv_qasymm8_signed_sve( + const ITensor *predictions, const ITensor *targets, ITensor *out, uint32_t k, const Window &win) +{ + detail::topkv_sve_wrapper(predictions, targets, out, k, win); +} + +// Force instantiation into this TU +template void detail::topkv_sve_wrapper(const ITensor *, const ITensor *, ITensor *, uint32_t, const Window &); + +} // namespace cpu +} // namespace arm_compute + +#endif // __ARM_FEATURE_SVE diff --git a/src/cpu/kernels/topkv/list.h b/src/cpu/kernels/topkv/list.h index 3719c5eb42..ac3058854c 100644 --- a/src/cpu/kernels/topkv/list.h +++ b/src/cpu/kernels/topkv/list.h @@ -36,6 +36,11 @@ DECLARE_TOPKV_KERNEL(topkv_qasymm8_signed_neon); DECLARE_TOPKV_KERNEL(topkv_fp16_neon); DECLARE_TOPKV_KERNEL(topkv_fp32_neon); DECLARE_TOPKV_KERNEL(topkv_s32_neon); +DECLARE_TOPKV_KERNEL(topkv_fp32_sve); +DECLARE_TOPKV_KERNEL(topkv_fp16_sve); +DECLARE_TOPKV_KERNEL(topkv_qasymm8_sve); +DECLARE_TOPKV_KERNEL(topkv_qasymm8_signed_sve); +DECLARE_TOPKV_KERNEL(topkv_s32_sve); #undef DECLARE_TOPKV_KERNEL } // namespace cpu