diff --git a/include/xsimd/config/xsimd_cpu_features_x86.hpp b/include/xsimd/config/xsimd_cpu_features_x86.hpp index 439cc6b5e..26b45ffb8 100644 --- a/include/xsimd/config/xsimd_cpu_features_x86.hpp +++ b/include/xsimd/config/xsimd_cpu_features_x86.hpp @@ -397,10 +397,16 @@ namespace xsimd sse4_2 = 20, /* Population count instruction (POPCNT). */ popcnt = 23, + /* Advanced Encryption Standard instruction set. */ + aes_ni = 25, /* OS has enabled XSAVE/XRSTOR for extended processor state management. */ osxsave = 27, /* Advanced Vector Extensions (256-bit SIMD). */ avx = 28, + /* Half to single floating point conversion. */ + f16c = 29, + /* On-chip random number generator. */ + rdrnd = 30, }; enum class edx { @@ -449,6 +455,10 @@ namespace xsimd avx512f = 16, /* AVX-512 Doubleword and Quadword instructions. */ avx512dq = 17, + /* Low-level access to the entropy-generating hardware. */ + rdseed = 18, + /* Intel arbitrary precision add carry. */ + adx = 19, /* AVX-512 Integer Fused Multiply-Add instructions. */ avx512ifma = 21, /* AVX-512 Prefetch instructions. */ @@ -457,8 +467,12 @@ namespace xsimd avx512er = 27, /* AVX-512 Conflict Detection instructions. */ avx512cd = 28, + /* Sha-1 and Sha-256 extension. */ + sha = 29, /* AVX-512 Byte and Word instructions. */ avx512bw = 30, + /* AVX-512 Vector Length Extensions for xmm and ymm registers. */ + avx512vl = 31, }; enum class ecx { @@ -466,14 +480,37 @@ namespace xsimd avx512vbmi = 1, /* AVX-512 Vector Bit Manipulation instructions 2. */ avx512vbmi2 = 6, + /* Galois Field instructions. */ + gfni = 8, + /* Vector Advanced Encryption Standard instructions. */ + vaes = 9, + /* Carry-less multiplication quadword instruction. */ + vpclmulqdq = 10, /* AVX-512 Vector Neural Network instructions. */ avx512vnni_bw = 11, + /* AVX-512 bit algorithm instructions (BITALG). */ + avx512_bitalg = 12, + /* AVX-512 vector population count for doubleword and quadword. */ + avx512_vpopcntdq = 14, + }; + enum class edx + { + /* AVX-512 4-register neural network instructions (word variable precision). */ + avx512_4vnniw = 2, + /* AVX-512 4-register multiply-accumulate single precision. */ + avx512_4fmaps = 3, + /* AVX-512 intersect pairs of packed doubleword/quadword integers. */ + avx512_vp2intersect = 8, + /* AVX-512 16-bit floating-point instructions. */ + avx512_fp16 = 23, + }; using regs_t = detail::x86_cpuid_regs, detail::x86_reg_id, - detail::x86_reg_id>; + detail::x86_reg_id, + detail::x86_reg_id>; }; /** @@ -497,6 +534,8 @@ namespace xsimd { /* AVX (VEX-encoded) Vector Neural Network instructions. */ avxvnni = 4, + /* AVX-512 BFloat16 instructions. */ + avx512_bf16 = 5, }; using regs_t = detail::x86_cpuid_regs(); } + inline bool avx_128() const noexcept { return sse_enabled() && leaf1().all_bits_set(); } + + inline bool aes_ni() const noexcept { return sse_enabled() && leaf1().all_bits_set(); } + + inline bool f16c() const noexcept { return avx_enabled() && leaf1().all_bits_set(); } + + inline bool rdrnd() const noexcept { return leaf1().all_bits_set(); } + inline bool bmi1() const noexcept { return leaf7().all_bits_set(); } inline bool avx2() const noexcept { return avx_enabled() && leaf7().all_bits_set(); } + inline bool avx2_128() const noexcept { return sse_enabled() && leaf7().all_bits_set(); } + inline bool bmi2() const noexcept { return leaf7().all_bits_set(); } inline bool avx512f() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } inline bool avx512dq() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + inline bool rdseed() const noexcept { return leaf7().all_bits_set(); } + + inline bool adx() const noexcept { return leaf7().all_bits_set(); } + inline bool avx512ifma() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } inline bool avx512pf() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } @@ -836,16 +889,40 @@ namespace xsimd inline bool avx512cd() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + inline bool sha() const noexcept { return leaf7().all_bits_set(); } + inline bool avx512bw() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + inline bool avx512vl() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + inline bool avx512vbmi() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } inline bool avx512vbmi2() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + inline bool gfni() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + + inline bool vaes() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + + inline bool vpclmulqdq() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + inline bool avx512vnni_bw() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + inline bool avx512_bitalg() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + + inline bool avx512_vpopcntdq() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + + inline bool avx512_4vnniw() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + + inline bool avx512_4fmaps() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + + inline bool avx512_vp2intersect() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + + inline bool avx512_fp16() const noexcept { return avx512_enabled() && leaf7().all_bits_set(); } + inline bool avxvnni() const noexcept { return avx_enabled() && leaf7sub1().all_bits_set(); } + inline bool avx512_bf16() const noexcept { return avx512_enabled() && leaf7sub1().all_bits_set(); } + inline bool fma4() const noexcept { return avx_enabled() && leaf80000001().all_bits_set(); } };