From 6c5824a411caf739cbf39c5642cacec02038c9c9 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 12:17:18 +0100 Subject: [PATCH 1/9] Refactor x86 CPU features --- include/xsimd/config/xsimd_config.hpp | 11 + include/xsimd/config/xsimd_cpuid.hpp | 162 +++------- include/xsimd/types/xsimd_all_registers.hpp | 2 + include/xsimd/xsimd_cpu_features_x86.hpp | 321 ++++++++++++++++++++ 4 files changed, 371 insertions(+), 125 deletions(-) create mode 100644 include/xsimd/xsimd_cpu_features_x86.hpp diff --git a/include/xsimd/config/xsimd_config.hpp b/include/xsimd/config/xsimd_config.hpp index 58ce53462..aead21c45 100644 --- a/include/xsimd/config/xsimd_config.hpp +++ b/include/xsimd/config/xsimd_config.hpp @@ -33,6 +33,17 @@ * @defgroup xsimd_config_macro Instruction Set Detection */ +/** + * @ingroup xsimd_config_macro + * + * Set to 1 if the target is the x86 architecture family. + */ +#if defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) +#define XSIMD_TARGET_X86 1 +#else +#define XSIMD_TARGET_X86 0 +#endif + /** * @ingroup xsimd_config_macro * diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 9805bf087..7493aee74 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -12,9 +12,12 @@ #ifndef XSIMD_CPUID_HPP #define XSIMD_CPUID_HPP -#include #include +#include "../types/xsimd_all_registers.hpp" +#include "../xsimd_cpu_features_x86.hpp" +#include "xsimd_inline.hpp" + #if defined(__linux__) && (defined(__ARM_NEON) || defined(_M_ARM) || defined(__riscv_vector)) #include #include @@ -25,13 +28,6 @@ #endif -#if defined(_MSC_VER) -// Contains the definition of __cpuidex -#include -#endif - -#include "../types/xsimd_all_registers.hpp" - namespace xsimd { namespace detail @@ -126,138 +122,54 @@ namespace xsimd #endif rvv = bool(getauxval(AT_HWCAP) & HWCAP_V); #endif - -#elif defined(__x86_64__) || defined(__i386__) || defined(_M_AMD64) || defined(_M_IX86) - - auto get_xcr0_low = []() noexcept - { - uint32_t xcr0; - -#if defined(_MSC_VER) && _MSC_VER >= 1400 - - xcr0 = (uint32_t)_xgetbv(0); - -#elif defined(__GNUC__) - - __asm__( - "xorl %%ecx, %%ecx\n" - "xgetbv\n" - : "=a"(xcr0) - : -#if defined(__i386__) - : "ecx", "edx" -#else - : "rcx", "rdx" -#endif - ); - -#else /* _MSC_VER < 1400 */ -#error "_MSC_VER < 1400 is not supported" -#endif /* _MSC_VER && _MSC_VER >= 1400 */ - return xcr0; - }; - - auto get_cpuid = [](int reg[4], int level, int count = 0) noexcept - { - -#if defined(_MSC_VER) - __cpuidex(reg, level, count); - -#elif defined(__INTEL_COMPILER) - __cpuid(reg, level); - -#elif defined(__GNUC__) || defined(__clang__) - -#if defined(__i386__) && defined(__PIC__) - // %ebx may be the PIC register - __asm__("xchg{l}\t{%%}ebx, %1\n\t" - "cpuid\n\t" - "xchg{l}\t{%%}ebx, %1\n\t" - : "=a"(reg[0]), "=r"(reg[1]), "=c"(reg[2]), "=d"(reg[3]) - : "0"(level), "2"(count)); - -#else - __asm__("cpuid\n\t" - : "=a"(reg[0]), "=b"(reg[1]), "=c"(reg[2]), "=d"(reg[3]) - : "0"(level), "2"(count)); -#endif - -#else -#error "Unsupported configuration" #endif - }; - - int regs1[4]; - - get_cpuid(regs1, 0x1); - - // OS can explicitly disable the usage of SSE/AVX extensions - // by setting an appropriate flag in CR0 register - // - // https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html + const auto cpuid = xsimd::x86_cpu_id::read(); + auto xcr0 = xsimd::x86_xcr0::make_false(); - unsigned sse_state_os_enabled = 1; + bool sse_state_os_enabled = true; // AVX and AVX512 strictly require OSXSAVE to be enabled by the OS. // If OSXSAVE is disabled (e.g., via bcdedit /set xsavedisable 1), // AVX state won't be preserved across context switches, so AVX cannot be used. - unsigned avx_state_os_enabled = 0; - unsigned avx512_state_os_enabled = 0; + bool avx_state_os_enabled = false; + bool avx512_state_os_enabled = false; - // OSXSAVE: A value of 1 indicates that the OS has set CR4.OSXSAVE[bit - // 18] to enable XSETBV/XGETBV instructions to access XCR0 and - // to support processor extended state management using - // XSAVE/XRSTOR. - bool osxsave = regs1[2] >> 27 & 1; - if (osxsave) + if (cpuid.osxsave()) { + xcr0 = xsimd::x86_xcr0::read(); - uint32_t xcr0 = get_xcr0_low(); - - sse_state_os_enabled = xcr0 >> 1 & 1; - avx_state_os_enabled = xcr0 >> 2 & sse_state_os_enabled; - avx512_state_os_enabled = xcr0 >> 6 & avx_state_os_enabled; + sse_state_os_enabled = xcr0.sse_state_os_enabled(); + avx_state_os_enabled = xcr0.avx_state_os_enabled(); + avx512_state_os_enabled = xcr0.avx512_state_os_enabled(); } - sse2 = regs1[3] >> 26 & sse_state_os_enabled; - sse3 = regs1[2] >> 0 & sse_state_os_enabled; - ssse3 = regs1[2] >> 9 & sse_state_os_enabled; - sse4_1 = regs1[2] >> 19 & sse_state_os_enabled; - sse4_2 = regs1[2] >> 20 & sse_state_os_enabled; - fma3_sse42 = regs1[2] >> 12 & sse_state_os_enabled; - - avx = regs1[2] >> 28 & avx_state_os_enabled; - fma3_avx = avx && fma3_sse42; - - int regs8[4]; - get_cpuid(regs8, 0x80000001); - fma4 = regs8[2] >> 16 & avx_state_os_enabled; - - // sse4a = regs[2] >> 6 & 1; + sse2 = cpuid.sse2() && sse_state_os_enabled; + sse3 = cpuid.sse3() && sse_state_os_enabled; + ssse3 = cpuid.ssse3() && sse_state_os_enabled; + sse4_1 = cpuid.sse4_1() && sse_state_os_enabled; + sse4_2 = cpuid.sse4_2() && sse_state_os_enabled; + fma3_sse42 = cpuid.fma3() && sse_state_os_enabled; - // xop = regs[2] >> 11 & 1; - - int regs7[4]; - get_cpuid(regs7, 0x7); - avx2 = regs7[1] >> 5 & avx_state_os_enabled; - - int regs7a[4]; - get_cpuid(regs7a, 0x7, 0x1); - avxvnni = regs7a[0] >> 4 & avx_state_os_enabled; + // sse4a not implemented in cpu_id yet + // xop not implemented in cpu_id yet + avx = cpuid.avx() && avx_state_os_enabled; + fma3_avx = avx && fma3_sse42; + fma4 = cpuid.fma4() && avx_state_os_enabled; + avx2 = cpuid.avx2() && avx_state_os_enabled; + avxvnni = cpuid.avxvnni() && avx_state_os_enabled; fma3_avx2 = avx2 && fma3_sse42; - avx512f = regs7[1] >> 16 & avx512_state_os_enabled; - avx512cd = regs7[1] >> 28 & avx512_state_os_enabled; - avx512dq = regs7[1] >> 17 & avx512_state_os_enabled; - avx512bw = regs7[1] >> 30 & avx512_state_os_enabled; - avx512er = regs7[1] >> 27 & avx512_state_os_enabled; - avx512pf = regs7[1] >> 26 & avx512_state_os_enabled; - avx512ifma = regs7[1] >> 21 & avx512_state_os_enabled; - avx512vbmi = regs7[2] >> 1 & avx512_state_os_enabled; - avx512vbmi2 = regs7[2] >> 6 & avx512_state_os_enabled; - avx512vnni_bw = regs7[2] >> 11 & avx512_state_os_enabled; + avx512f = cpuid.avx512f() && avx512_state_os_enabled; + avx512cd = cpuid.avx512cd() && avx512_state_os_enabled; + avx512dq = cpuid.avx512dq() && avx512_state_os_enabled; + avx512bw = cpuid.avx512bw() && avx512_state_os_enabled; + avx512er = cpuid.avx512er() && avx512_state_os_enabled; + avx512pf = cpuid.avx512pf() && avx512_state_os_enabled; + avx512ifma = cpuid.avx512ifma() && avx512_state_os_enabled; + avx512vbmi = cpuid.avx512vbmi() && avx512_state_os_enabled; + avx512vbmi2 = cpuid.avx512vbmi2() && avx512_state_os_enabled; + avx512vnni_bw = cpuid.avx512vnni_bw() && avx512_state_os_enabled; avx512vnni_vbmi2 = avx512vbmi2 && avx512vnni_bw; -#endif } }; } // namespace detail diff --git a/include/xsimd/types/xsimd_all_registers.hpp b/include/xsimd/types/xsimd_all_registers.hpp index 33f9b465d..da87df2e4 100644 --- a/include/xsimd/types/xsimd_all_registers.hpp +++ b/include/xsimd/types/xsimd_all_registers.hpp @@ -9,6 +9,8 @@ * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ +#include "../config/xsimd_inline.hpp" + #include "xsimd_fma3_sse_register.hpp" #include "xsimd_fma4_register.hpp" #include "xsimd_sse2_register.hpp" diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp new file mode 100644 index 000000000..185c08c30 --- /dev/null +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -0,0 +1,321 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ****************************************************************************/ + +#ifndef XSIMD_CPU_FEATURES_X86_HPP +#define XSIMD_CPU_FEATURES_X86_HPP + +#include + +#include "./config/xsimd_config.hpp" + +#if XSIMD_TARGET_X86 && defined(_MSC_VER) +// Contains the definition of __cpuidex +#include +#endif + +namespace xsimd +{ + namespace detail + { + template + constexpr I make_bit_mask(I bit) + { + return static_cast(I { 1 } << bit); + } + + template + constexpr I make_bit_mask(I bit, Args... bits) + { + return make_bit_mask(bit) | make_bit_mask(static_cast(bits)...); + } + + template + constexpr bool bit_is_set(I value) + { + constexpr I mask = make_bit_mask(static_cast(Bits)...); + return (value & mask) == mask; + } + + inline void get_cpuid(int reg[4], int level, int count = 0) noexcept; + + inline std::uint32_t get_xcr0_low() noexcept; + } + + /* + * Extended Control Register 0 (XCR0). + * + * Operating systems can explicitly disable the usage of instruction set (such + * as SSE or AVX extensions) by setting an appropriate flag in XCR0 register. + * This utility parses such bit values. + * + * @see https://docs.kernel.org/admin-guide/hw-vuln/gather_data_sampling.html + */ + class x86_xcr0 + { + public: + using reg_t = std::uint32_t; + + /** Parse a XCR0 value into individual components. */ + constexpr explicit x86_xcr0(reg_t low) noexcept + : m_low(low) + { + } + + /** Create an object that has all features set to false. */ + static constexpr x86_xcr0 make_false() + { + return x86_xcr0(0); + } + + /** Read the XCR0 register from the CPU if on the correct architecture. */ + static x86_xcr0 read() + { + return x86_xcr0(detail::get_xcr0_low()); + } + + constexpr bool sse_state_os_enabled() const noexcept + { + return detail::bit_is_set<1>(m_low); + } + + constexpr bool avx_state_os_enabled() const noexcept + { + // Check both SSE and AVX bits even though AVX must imply SSE + return detail::bit_is_set<1, 2>(m_low); + } + + constexpr bool avx512_state_os_enabled() const noexcept + { + // Check all SSE, AVX, and AVX52 bits even though AVX512 must + // imply AVX and SSE + return detail::bit_is_set<1, 2, 6>(m_low); + } + + private: + std::uint32_t m_low = {}; + }; + + class x86_cpu_id + { + public: + struct cpu_id_regs + { + using reg_t = int[4]; + + reg_t reg1 = {}; + reg_t reg7 = {}; + reg_t reg7a = {}; + reg_t reg8 = {}; + }; + + /** Parse CpuInfo register values into individual components. */ + constexpr explicit x86_cpu_id(const cpu_id_regs& regs) noexcept + : m_regs(regs) + { + } + + /** + * Read the CpuId registers from the CPU if on the correct architecture. + * + * This is only safe to call if bit 18 of CR4.OSXSAVE has been set. + * + * @see cpu_id::osxsave + */ + static x86_cpu_id read() + { + cpu_id_regs regs = {}; + detail::get_cpuid(regs.reg1, 0x1); + detail::get_cpuid(regs.reg7, 0x7); + detail::get_cpuid(regs.reg7a, 0x7, 0x1); + detail::get_cpuid(regs.reg8, 0x80000001); + return x86_cpu_id(regs); + } + + constexpr bool sse2() const noexcept + { + return detail::bit_is_set<26>(m_regs.reg1[3]); + } + + constexpr bool sse3() const noexcept + { + return detail::bit_is_set<0>(m_regs.reg1[2]); + } + + constexpr bool ssse3() const noexcept + { + return detail::bit_is_set<9>(m_regs.reg1[2]); + } + + constexpr bool sse4_1() const noexcept + { + return detail::bit_is_set<19>(m_regs.reg1[2]); + } + + constexpr bool sse4_2() const noexcept + { + return detail::bit_is_set<20>(m_regs.reg1[2]); + } + + constexpr bool fma3() const noexcept + { + return detail::bit_is_set<12>(m_regs.reg1[2]); + } + + /** + * Indicates whether the OS has enabled extended state management. + * + * When true, the OS has set bit 18 (OSXSAVE) in the CR4 control register, + * enabling the XGETBV/XSETBV instructions to access XCR0 and support + * processor extended state management using XSAVE/XRSTOR. + * + * This value is read from CPUID leaf 0x1, ECX bit 27, which reflects + * the state of CR4.OSXSAVE. + */ + constexpr bool osxsave() const noexcept + { + return detail::bit_is_set<27>(m_regs.reg1[2]); + } + + constexpr bool avx() const noexcept + { + return detail::bit_is_set<28>(m_regs.reg1[2]); + } + + constexpr bool avx2() const noexcept + { + return detail::bit_is_set<5>(m_regs.reg7[1]); + } + + constexpr bool avx512f() const noexcept + { + return detail::bit_is_set<16>(m_regs.reg7[1]); + } + + constexpr bool avx512dq() const noexcept + { + return detail::bit_is_set<17>(m_regs.reg7[1]); + } + + constexpr bool avx512ifma() const noexcept + { + return detail::bit_is_set<21>(m_regs.reg7[1]); + } + + constexpr bool avx512pf() const noexcept + { + return detail::bit_is_set<26>(m_regs.reg7[1]); + } + + constexpr bool avx512er() const noexcept + { + return detail::bit_is_set<27>(m_regs.reg7[1]); + } + + constexpr bool avx512cd() const noexcept + { + return detail::bit_is_set<28>(m_regs.reg7[1]); + } + + constexpr bool avx512bw() const noexcept + { + return detail::bit_is_set<30>(m_regs.reg7[1]); + } + + constexpr bool avx512vbmi() const noexcept + { + return detail::bit_is_set<1>(m_regs.reg7[2]); + } + + constexpr bool avx512vbmi2() const noexcept + { + return detail::bit_is_set<6>(m_regs.reg7[2]); + } + + constexpr bool avx512vnni_bw() const noexcept + { + return detail::bit_is_set<11>(m_regs.reg7[2]); + } + + constexpr bool avxvnni() const noexcept + { + return detail::bit_is_set<4>(m_regs.reg7a[0]); + } + + constexpr bool fma4() const noexcept + { + return detail::bit_is_set<16>(m_regs.reg8[2]); + } + + private: + cpu_id_regs m_regs = {}; + }; + + namespace detail + { + inline void get_cpuid(int reg[4], int level, int count) noexcept + { +#if !XSIMD_TARGET_X86 + reg = {}; // Fill zeros + +#elif defined(_MSC_VER) + __cpuidex(reg, level, count); + +#elif defined(__INTEL_COMPILER) + __cpuid(reg, level); + +#elif defined(__GNUC__) || defined(__clang__) + +#if defined(__i386__) && defined(__PIC__) + // %ebx may be the PIC register + __asm__("xchg{l}\t{%%}ebx, %1\n\t" + "cpuid\n\t" + "xchg{l}\t{%%}ebx, %1\n\t" + : "=a"(reg[0]), "=r"(reg[1]), "=c"(reg[2]), "=d"(reg[3]) + : "0"(level), "2"(count)); + +#else + __asm__("cpuid\n\t" + : "=a"(reg[0]), "=b"(reg[1]), "=c"(reg[2]), "=d"(reg[3]) + : "0"(level), "2"(count)); +#endif +#endif + } + + inline std::uint32_t get_xcr0_low() noexcept + { +#if !XSIMD_TARGET_X86 + return {}; // return 0; + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 + return static_cast(_xgetbv(0)); + +#elif defined(__GNUC__) + std::uint32_t xcr0 = {}; + __asm__( + "xorl %%ecx, %%ecx\n" + "xgetbv\n" + : "=a"(xcr0) + : +#if defined(__i386__) + : "ecx", "edx" +#else + : "rcx", "rdx" +#endif + ); + return xcr0; + +#else /* _MSC_VER < 1400 */ +#error "_MSC_VER < 1400 is not supported" +#endif /* _MSC_VER && _MSC_VER >= 1400 */ + }; + } +} +#endif From 05f4144303934ee308ea39424dc2be5c8f71f2b5 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 12:23:06 +0100 Subject: [PATCH 2/9] Remove unsafe memset --- include/xsimd/config/xsimd_cpuid.hpp | 6 +----- include/xsimd/types/xsimd_common_arch.hpp | 2 ++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/include/xsimd/config/xsimd_cpuid.hpp b/include/xsimd/config/xsimd_cpuid.hpp index 7493aee74..461eaded5 100644 --- a/include/xsimd/config/xsimd_cpuid.hpp +++ b/include/xsimd/config/xsimd_cpuid.hpp @@ -12,8 +12,6 @@ #ifndef XSIMD_CPUID_HPP #define XSIMD_CPUID_HPP -#include - #include "../types/xsimd_all_registers.hpp" #include "../xsimd_cpu_features_x86.hpp" #include "xsimd_inline.hpp" @@ -36,7 +34,7 @@ namespace xsimd { #define ARCH_FIELD_EX(arch, field_name) \ - unsigned field_name; \ + unsigned field_name = 0; \ XSIMD_INLINE bool has(::xsimd::arch) const { return this->field_name; } #define ARCH_FIELD_EX_REUSE(arch, field_name) \ @@ -86,8 +84,6 @@ namespace xsimd XSIMD_INLINE supported_arch() noexcept { - memset(this, 0, sizeof(supported_arch)); - #if XSIMD_WITH_WASM wasm = 1; #endif diff --git a/include/xsimd/types/xsimd_common_arch.hpp b/include/xsimd/types/xsimd_common_arch.hpp index 28491aeda..a33c868ea 100644 --- a/include/xsimd/types/xsimd_common_arch.hpp +++ b/include/xsimd/types/xsimd_common_arch.hpp @@ -12,6 +12,8 @@ #ifndef XSIMD_COMMON_ARCH_HPP #define XSIMD_COMMON_ARCH_HPP +#include + #include "../config/xsimd_config.hpp" /** From 4296519d111e453578cf78c622d4f59a6276db8a Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 12:35:20 +0100 Subject: [PATCH 3/9] Fix warning --- include/xsimd/xsimd_cpu_features_x86.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index 185c08c30..348c8b2c8 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -263,7 +263,12 @@ namespace xsimd inline void get_cpuid(int reg[4], int level, int count) noexcept { #if !XSIMD_TARGET_X86 - reg = {}; // Fill zeros + reg[0] = 0; + reg[1] = 0; + reg[2] = 0; + reg[3] = 0; + (void)level; + (void)count; #elif defined(_MSC_VER) __cpuidex(reg, level, count); From 6d366ddab345593e6c94faa065f8f618605c0d1d Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 12:40:15 +0100 Subject: [PATCH 4/9] Fix inline --- include/xsimd/xsimd_cpu_features_x86.hpp | 4 ++-- test/check_inline_specifier.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index 348c8b2c8..99e6b6eb3 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -76,7 +76,7 @@ namespace xsimd } /** Read the XCR0 register from the CPU if on the correct architecture. */ - static x86_xcr0 read() + inline static x86_xcr0 read() { return x86_xcr0(detail::get_xcr0_low()); } @@ -129,7 +129,7 @@ namespace xsimd * * @see cpu_id::osxsave */ - static x86_cpu_id read() + inline static x86_cpu_id read() { cpu_id_regs regs = {}; detail::get_cpuid(regs.reg1, 0x1); diff --git a/test/check_inline_specifier.sh b/test/check_inline_specifier.sh index 1ccdda130..2337b3707 100755 --- a/test/check_inline_specifier.sh +++ b/test/check_inline_specifier.sh @@ -3,7 +3,7 @@ # Usage: $0 top_srcdir # # This script walks all headers in $top_srcdir/include and makes sure that all -# functions declared tehre are marked as inline or constexpr (which implies +# functions declared there are marked as inline or constexpr (which implies # inline). This makes sure the xsimd headers does not define symbol with global # linkage, and somehow convey our itnent to have all functions in xsimd being # inlined by the compiler. From 09f23d78a6246e9850b14a8db36510198f18f6f4 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 14:54:33 +0100 Subject: [PATCH 5/9] Refactor bit utils --- include/xsimd/utils/bits.hpp | 40 +++++++++++++ include/xsimd/xsimd_cpu_features_x86.hpp | 71 +++++++++--------------- 2 files changed, 66 insertions(+), 45 deletions(-) create mode 100644 include/xsimd/utils/bits.hpp diff --git a/include/xsimd/utils/bits.hpp b/include/xsimd/utils/bits.hpp new file mode 100644 index 000000000..e3ad83e30 --- /dev/null +++ b/include/xsimd/utils/bits.hpp @@ -0,0 +1,40 @@ +/*************************************************************************** + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * + * Martin Renou * + * Copyright (c) QuantStack * + * Copyright (c) Serge Guelton * + * * + * Distributed under the terms of the BSD 3-Clause License. * + * * + * The full license is in the file LICENSE, distributed with this software. * + ***************************************************************************/ + +#ifndef XSIMD_CPUID_UTILS_HPP +#define XSIMD_CPUID_UTILS_HPP + +namespace xsimd +{ + namespace utils + { + template + constexpr I make_bit_mask(I bit) + { + return static_cast(I { 1 } << bit); + } + + template + constexpr I make_bit_mask(I bit, Args... bits) + { + return make_bit_mask(bit) | make_bit_mask(static_cast(bits)...); + } + + template + constexpr bool bit_is_set(I value) + { + constexpr I mask = make_bit_mask(static_cast(Bits)...); + return (value & mask) == mask; + } + } +} + +#endif diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index 99e6b6eb3..ba816d2cd 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -15,35 +15,16 @@ #include #include "./config/xsimd_config.hpp" +#include "./utils/bits.hpp" #if XSIMD_TARGET_X86 && defined(_MSC_VER) -// Contains the definition of __cpuidex -#include +#include // Contains the definition of __cpuidex #endif namespace xsimd { namespace detail { - template - constexpr I make_bit_mask(I bit) - { - return static_cast(I { 1 } << bit); - } - - template - constexpr I make_bit_mask(I bit, Args... bits) - { - return make_bit_mask(bit) | make_bit_mask(static_cast(bits)...); - } - - template - constexpr bool bit_is_set(I value) - { - constexpr I mask = make_bit_mask(static_cast(Bits)...); - return (value & mask) == mask; - } - inline void get_cpuid(int reg[4], int level, int count = 0) noexcept; inline std::uint32_t get_xcr0_low() noexcept; @@ -83,20 +64,20 @@ namespace xsimd constexpr bool sse_state_os_enabled() const noexcept { - return detail::bit_is_set<1>(m_low); + return utils::bit_is_set<1>(m_low); } constexpr bool avx_state_os_enabled() const noexcept { // Check both SSE and AVX bits even though AVX must imply SSE - return detail::bit_is_set<1, 2>(m_low); + return utils::bit_is_set<1, 2>(m_low); } constexpr bool avx512_state_os_enabled() const noexcept { // Check all SSE, AVX, and AVX52 bits even though AVX512 must // imply AVX and SSE - return detail::bit_is_set<1, 2, 6>(m_low); + return utils::bit_is_set<1, 2, 6>(m_low); } private: @@ -141,32 +122,32 @@ namespace xsimd constexpr bool sse2() const noexcept { - return detail::bit_is_set<26>(m_regs.reg1[3]); + return utils::bit_is_set<26>(m_regs.reg1[3]); } constexpr bool sse3() const noexcept { - return detail::bit_is_set<0>(m_regs.reg1[2]); + return utils::bit_is_set<0>(m_regs.reg1[2]); } constexpr bool ssse3() const noexcept { - return detail::bit_is_set<9>(m_regs.reg1[2]); + return utils::bit_is_set<9>(m_regs.reg1[2]); } constexpr bool sse4_1() const noexcept { - return detail::bit_is_set<19>(m_regs.reg1[2]); + return utils::bit_is_set<19>(m_regs.reg1[2]); } constexpr bool sse4_2() const noexcept { - return detail::bit_is_set<20>(m_regs.reg1[2]); + return utils::bit_is_set<20>(m_regs.reg1[2]); } constexpr bool fma3() const noexcept { - return detail::bit_is_set<12>(m_regs.reg1[2]); + return utils::bit_is_set<12>(m_regs.reg1[2]); } /** @@ -181,77 +162,77 @@ namespace xsimd */ constexpr bool osxsave() const noexcept { - return detail::bit_is_set<27>(m_regs.reg1[2]); + return utils::bit_is_set<27>(m_regs.reg1[2]); } constexpr bool avx() const noexcept { - return detail::bit_is_set<28>(m_regs.reg1[2]); + return utils::bit_is_set<28>(m_regs.reg1[2]); } constexpr bool avx2() const noexcept { - return detail::bit_is_set<5>(m_regs.reg7[1]); + return utils::bit_is_set<5>(m_regs.reg7[1]); } constexpr bool avx512f() const noexcept { - return detail::bit_is_set<16>(m_regs.reg7[1]); + return utils::bit_is_set<16>(m_regs.reg7[1]); } constexpr bool avx512dq() const noexcept { - return detail::bit_is_set<17>(m_regs.reg7[1]); + return utils::bit_is_set<17>(m_regs.reg7[1]); } constexpr bool avx512ifma() const noexcept { - return detail::bit_is_set<21>(m_regs.reg7[1]); + return utils::bit_is_set<21>(m_regs.reg7[1]); } constexpr bool avx512pf() const noexcept { - return detail::bit_is_set<26>(m_regs.reg7[1]); + return utils::bit_is_set<26>(m_regs.reg7[1]); } constexpr bool avx512er() const noexcept { - return detail::bit_is_set<27>(m_regs.reg7[1]); + return utils::bit_is_set<27>(m_regs.reg7[1]); } constexpr bool avx512cd() const noexcept { - return detail::bit_is_set<28>(m_regs.reg7[1]); + return utils::bit_is_set<28>(m_regs.reg7[1]); } constexpr bool avx512bw() const noexcept { - return detail::bit_is_set<30>(m_regs.reg7[1]); + return utils::bit_is_set<30>(m_regs.reg7[1]); } constexpr bool avx512vbmi() const noexcept { - return detail::bit_is_set<1>(m_regs.reg7[2]); + return utils::bit_is_set<1>(m_regs.reg7[2]); } constexpr bool avx512vbmi2() const noexcept { - return detail::bit_is_set<6>(m_regs.reg7[2]); + return utils::bit_is_set<6>(m_regs.reg7[2]); } constexpr bool avx512vnni_bw() const noexcept { - return detail::bit_is_set<11>(m_regs.reg7[2]); + return utils::bit_is_set<11>(m_regs.reg7[2]); } constexpr bool avxvnni() const noexcept { - return detail::bit_is_set<4>(m_regs.reg7a[0]); + return utils::bit_is_set<4>(m_regs.reg7a[0]); } constexpr bool fma4() const noexcept { - return detail::bit_is_set<16>(m_regs.reg8[2]); + return utils::bit_is_set<16>(m_regs.reg8[2]); } private: From 0f85d0ee02f5f1275a132c2a6baa8dc9b445492c Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 17:32:56 +0100 Subject: [PATCH 6/9] Fix inline header in wrong location --- include/xsimd/types/xsimd_all_registers.hpp | 2 -- include/xsimd/types/xsimd_register.hpp | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/xsimd/types/xsimd_all_registers.hpp b/include/xsimd/types/xsimd_all_registers.hpp index da87df2e4..33f9b465d 100644 --- a/include/xsimd/types/xsimd_all_registers.hpp +++ b/include/xsimd/types/xsimd_all_registers.hpp @@ -9,8 +9,6 @@ * The full license is in the file LICENSE, distributed with this software. * ****************************************************************************/ -#include "../config/xsimd_inline.hpp" - #include "xsimd_fma3_sse_register.hpp" #include "xsimd_fma4_register.hpp" #include "xsimd_sse2_register.hpp" diff --git a/include/xsimd/types/xsimd_register.hpp b/include/xsimd/types/xsimd_register.hpp index 018418af6..b14962e5b 100644 --- a/include/xsimd/types/xsimd_register.hpp +++ b/include/xsimd/types/xsimd_register.hpp @@ -14,6 +14,8 @@ #include +#include "../config/xsimd_inline.hpp" + namespace xsimd { namespace types From 92404f40d171503f35c604d002c7df024f936d43 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 17:43:44 +0100 Subject: [PATCH 7/9] Fix cpuid aliases --- include/xsimd/xsimd_cpu_features_x86.hpp | 36 ++++++++++++------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index ba816d2cd..ec579aaec 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -12,6 +12,7 @@ #ifndef XSIMD_CPU_FEATURES_X86_HPP #define XSIMD_CPU_FEATURES_X86_HPP +#include #include #include "./config/xsimd_config.hpp" @@ -25,9 +26,11 @@ namespace xsimd { namespace detail { - inline void get_cpuid(int reg[4], int level, int count = 0) noexcept; + using cpuid_reg_t = std::array; + inline cpuid_reg_t get_cpuid(int level, int count = 0) noexcept; - inline std::uint32_t get_xcr0_low() noexcept; + using xcr0_reg_t = std::uint32_t; + inline xcr0_reg_t get_xcr0_low() noexcept; } /* @@ -42,7 +45,7 @@ namespace xsimd class x86_xcr0 { public: - using reg_t = std::uint32_t; + using reg_t = detail::xcr0_reg_t; /** Parse a XCR0 value into individual components. */ constexpr explicit x86_xcr0(reg_t low) noexcept @@ -81,7 +84,7 @@ namespace xsimd } private: - std::uint32_t m_low = {}; + reg_t m_low = {}; }; class x86_cpu_id @@ -89,7 +92,7 @@ namespace xsimd public: struct cpu_id_regs { - using reg_t = int[4]; + using reg_t = detail::cpuid_reg_t; reg_t reg1 = {}; reg_t reg7 = {}; @@ -113,10 +116,10 @@ namespace xsimd inline static x86_cpu_id read() { cpu_id_regs regs = {}; - detail::get_cpuid(regs.reg1, 0x1); - detail::get_cpuid(regs.reg7, 0x7); - detail::get_cpuid(regs.reg7a, 0x7, 0x1); - detail::get_cpuid(regs.reg8, 0x80000001); + regs.reg1 = detail::get_cpuid(0x1); + regs.reg7 = detail::get_cpuid(0x7); + regs.reg7a = detail::get_cpuid(0x7, 0x1); + regs.reg8 = detail::get_cpuid(0x80000001); return x86_cpu_id(regs); } @@ -241,15 +244,12 @@ namespace xsimd namespace detail { - inline void get_cpuid(int reg[4], int level, int count) noexcept + inline cpuid_reg_t get_cpuid(int level, int count) noexcept { #if !XSIMD_TARGET_X86 - reg[0] = 0; - reg[1] = 0; - reg[2] = 0; - reg[3] = 0; (void)level; (void)count; + return {}; // All bits to zero #elif defined(_MSC_VER) __cpuidex(reg, level, count); @@ -275,16 +275,16 @@ namespace xsimd #endif } - inline std::uint32_t get_xcr0_low() noexcept + inline xcr0_reg_t get_xcr0_low() noexcept { #if !XSIMD_TARGET_X86 - return {}; // return 0; + return {}; // All bits to zero #elif defined(_MSC_VER) && _MSC_VER >= 1400 - return static_cast(_xgetbv(0)); + return static_cast(_xgetbv(0)); #elif defined(__GNUC__) - std::uint32_t xcr0 = {}; + xcr0_reg_t xcr0 = {}; __asm__( "xorl %%ecx, %%ecx\n" "xgetbv\n" From 01af4b3818cc2c692de059609bb48356b61d85e5 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 17:53:01 +0100 Subject: [PATCH 8/9] Add future TODO --- include/xsimd/utils/bits.hpp | 1 + include/xsimd/xsimd_cpu_features_x86.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/include/xsimd/utils/bits.hpp b/include/xsimd/utils/bits.hpp index e3ad83e30..a8a862219 100644 --- a/include/xsimd/utils/bits.hpp +++ b/include/xsimd/utils/bits.hpp @@ -25,6 +25,7 @@ namespace xsimd template constexpr I make_bit_mask(I bit, Args... bits) { + // TODO(C++17): Use fold expression return make_bit_mask(bit) | make_bit_mask(static_cast(bits)...); } diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index ec579aaec..7af58fcd0 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -116,6 +116,7 @@ namespace xsimd inline static x86_cpu_id read() { cpu_id_regs regs = {}; + // TODO(C++20): Use designated initializer regs.reg1 = detail::get_cpuid(0x1); regs.reg7 = detail::get_cpuid(0x7); regs.reg7a = detail::get_cpuid(0x7, 0x1); From 769470ada0ea0aca2ab2e843503f99fe0f517ab2 Mon Sep 17 00:00:00 2001 From: AntoinePrv Date: Fri, 30 Jan 2026 17:55:51 +0100 Subject: [PATCH 9/9] Fix missing var --- include/xsimd/xsimd_cpu_features_x86.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/xsimd/xsimd_cpu_features_x86.hpp b/include/xsimd/xsimd_cpu_features_x86.hpp index 7af58fcd0..edc885951 100644 --- a/include/xsimd/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/xsimd_cpu_features_x86.hpp @@ -247,10 +247,11 @@ namespace xsimd { inline cpuid_reg_t get_cpuid(int level, int count) noexcept { + cpuid_reg_t reg = {}; #if !XSIMD_TARGET_X86 (void)level; (void)count; - return {}; // All bits to zero + return reg; // All bits to zero #elif defined(_MSC_VER) __cpuidex(reg, level, count);