From 565565b9f74c0954ca902e97b83276e5d5c16dac Mon Sep 17 00:00:00 2001 From: mborland Date: Tue, 16 Dec 2025 16:15:39 -0500 Subject: [PATCH 1/4] Make noinline portable --- test/benchmark_libbid.c | 51 +++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/test/benchmark_libbid.c b/test/benchmark_libbid.c index 5a1b15273..7ada30d9b 100644 --- a/test/benchmark_libbid.c +++ b/test/benchmark_libbid.c @@ -21,6 +21,13 @@ typedef BID_UINT128 Decimal128; #define K 20000000 #define N 5 +#ifdef _MSC_VER +# define BOOST_DECIMAL_NOINLINE __declspec(noinline) +#else +# define BOOST_DECIMAL_NOINLINE __attribute__ ((noinline)) +#endif +#endif + uint32_t flag = 0; uint32_t random_uint32(void) @@ -45,7 +52,7 @@ uint64_t random_uint64(void) return r; } -__attribute__ ((noinline)) void generate_vector_32(Decimal32* buffer, size_t buffer_len) +BOOST_DECIMAL_NOINLINE void generate_vector_32(Decimal32* buffer, size_t buffer_len) { for (size_t i = 0; i < buffer_len; ++i) { @@ -53,7 +60,7 @@ __attribute__ ((noinline)) void generate_vector_32(Decimal32* buffer, size_t buf } } -__attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char* label) +BOOST_DECIMAL_NOINLINE void test_comparisons_32(Decimal32* data, const char* label) { struct timespec t1, t2; clock_gettime(CLOCK_MONOTONIC, &t1); @@ -82,7 +89,7 @@ __attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char* printf("Comparisons <%-10s >: %-10" PRIu64 " us (s=%zu)\n", label, elapsed_time_us, s); } -__attribute__ ((noinline)) void generate_vector_64(Decimal64* buffer, size_t buffer_len) +BOOST_DECIMAL_NOINLINE void generate_vector_64(Decimal64* buffer, size_t buffer_len) { for (size_t i = 0; i < buffer_len; ++i) { @@ -90,7 +97,7 @@ __attribute__ ((noinline)) void generate_vector_64(Decimal64* buffer, size_t buf } } -__attribute__ ((noinline)) void test_comparisons_64(Decimal64* data, const char* label) +BOOST_DECIMAL_NOINLINE void test_comparisons_64(Decimal64* data, const char* label) { struct timespec t1, t2; clock_gettime(CLOCK_MONOTONIC, &t1); @@ -153,12 +160,12 @@ Decimal128 random_decimal128(void) // 5. Parse to decimal128 _IDEC_flags flags = 0; - Decimal128 result = bid128_from_string(str, &flags); + Decimal128 result = bid128_from_string(str, BID_ROUNDING_TO_NEAREST, &flags); return result; } -__attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size_t buffer_len) +BOOST_DECIMAL_NOINLINE void generate_vector_128(Decimal128* buffer, size_t buffer_len) { size_t i = 0; while (i < buffer_len) @@ -168,7 +175,7 @@ __attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size } } -__attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const char* label) +BOOST_DECIMAL_NOINLINE void test_comparisons_128(Decimal128* data, const char* label) { struct timespec t1, t2; clock_gettime(CLOCK_MONOTONIC, &t1); @@ -200,26 +207,26 @@ __attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const typedef Decimal32 (*operation_32)(Decimal32, Decimal32); -__attribute__ ((noinline)) Decimal32 add_32(Decimal32 a, Decimal32 b) +BOOST_DECIMAL_NOINLINE Decimal32 add_32(Decimal32 a, Decimal32 b) { return bid32_add(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((noinline)) Decimal32 sub_32(Decimal32 a, Decimal32 b) +BOOST_DECIMAL_NOINLINE Decimal32 sub_32(Decimal32 a, Decimal32 b) { return bid32_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((noinline)) Decimal32 mul_32(Decimal32 a, Decimal32 b) +BOOST_DECIMAL_NOINLINE Decimal32 mul_32(Decimal32 a, Decimal32 b) { return bid32_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((noinline)) Decimal32 div_32(Decimal32 a, Decimal32 b) +BOOST_DECIMAL_NOINLINE Decimal32 div_32(Decimal32 a, Decimal32 b) { return bid32_div(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label) +BOOST_DECIMAL_NOINLINE void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label) { struct timespec t1, t2; clock_gettime(CLOCK_MONOTONIC, &t1); @@ -245,27 +252,27 @@ __attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, o typedef Decimal64 (*operation_64)(Decimal64, Decimal64); -__attribute__ ((noinline)) Decimal64 add_64(Decimal64 a, Decimal64 b) +BOOST_DECIMAL_NOINLINE Decimal64 add_64(Decimal64 a, Decimal64 b) { return bid64_add(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((noinline)) Decimal64 sub_64(Decimal64 a, Decimal64 b) +BOOST_DECIMAL_NOINLINE Decimal64 sub_64(Decimal64 a, Decimal64 b) { return bid64_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((noinline)) Decimal64 mul_64(Decimal64 a, Decimal64 b) +BOOST_DECIMAL_NOINLINE Decimal64 mul_64(Decimal64 a, Decimal64 b) { return bid64_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((noinline)) Decimal64 div_64(Decimal64 a, Decimal64 b) +BOOST_DECIMAL_NOINLINE Decimal64 div_64(Decimal64 a, Decimal64 b) { return bid64_div(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label) +BOOST_DECIMAL_NOINLINE void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label) { struct timespec t1, t2; clock_gettime(CLOCK_MONOTONIC, &t1); @@ -292,27 +299,27 @@ __attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, o typedef Decimal128 (*operation_128)(Decimal128, Decimal128); -__attribute__ ((__noinline__)) Decimal128 add_128(Decimal128 a, Decimal128 b) +BOOST_DECIMAL_NOINLINE Decimal128 add_128(Decimal128 a, Decimal128 b) { return bid128_add(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((__noinline__)) Decimal128 sub_128(Decimal128 a, Decimal128 b) +BOOST_DECIMAL_NOINLINE Decimal128 sub_128(Decimal128 a, Decimal128 b) { return bid128_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((__noinline__)) Decimal128 mul_128(Decimal128 a, Decimal128 b) +BOOST_DECIMAL_NOINLINE Decimal128 mul_128(Decimal128 a, Decimal128 b) { return bid128_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((__noinline__)) Decimal128 div_128(Decimal128 a, Decimal128 b) +BOOST_DECIMAL_NOINLINE Decimal128 div_128(Decimal128 a, Decimal128 b) { return bid128_div(a, b, BID_ROUNDING_TO_NEAREST, &flag); } -__attribute__ ((__noinline__)) void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label) +BOOST_DECIMAL_NOINLINE void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label) { struct timespec t1, t2; clock_gettime(CLOCK_MONOTONIC, &t1); From 8ea4f1d10796449831ada87ee948d828f2826721 Mon Sep 17 00:00:00 2001 From: mborland Date: Tue, 16 Dec 2025 16:16:00 -0500 Subject: [PATCH 2/4] Don't access floating point environment on windows platform --- test/benchmark_libbid.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/benchmark_libbid.c b/test/benchmark_libbid.c index 7ada30d9b..dda1f8ce9 100644 --- a/test/benchmark_libbid.c +++ b/test/benchmark_libbid.c @@ -349,8 +349,10 @@ int main() // One time init of random number generator srand(time(NULL)); + #ifndef _WIN32 fedisableexcept(FE_ALL_EXCEPT); - + #endif + Decimal32* d32_array = malloc(K * sizeof(Decimal32)); Decimal64* d64_array = malloc(K * sizeof(Decimal64)); Decimal128* d128_array = malloc(K * sizeof(Decimal128)); From a930b39deef235e90b975189f385ffc9c49f7404 Mon Sep 17 00:00:00 2001 From: mborland Date: Tue, 16 Dec 2025 16:16:26 -0500 Subject: [PATCH 3/4] Add windows friendly CLOCK_MONOTONIC definition --- test/benchmark_libbid.c | 47 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/test/benchmark_libbid.c b/test/benchmark_libbid.c index dda1f8ce9..d3464284a 100644 --- a/test/benchmark_libbid.c +++ b/test/benchmark_libbid.c @@ -2,20 +2,25 @@ // Distributed under the Boost Software License, Version 1.0. // https://www.boost.org/LICENSE_1_0.txt -#define _POSIX_C_SOURCE 199309L +#ifdef _WIN32 +# define WIN32_LEAN_AND_MEAN +# include +#else +# define _POSIX_C_SOURCE 199309L +#endif #include #include #include -#include #include #include #include +#include "..\LIBRARY\src\bid_conf.h" +#include "..\LIBRARY\src\bid_functions.h" + typedef BID_UINT32 Decimal32; typedef BID_UINT64 Decimal64; -#include "../LIBRARY/src/bid_conf.h" -#include "../LIBRARY/src/bid_functions.h" typedef BID_UINT128 Decimal128; #define K 20000000 @@ -26,6 +31,40 @@ typedef BID_UINT128 Decimal128; #else # define BOOST_DECIMAL_NOINLINE __attribute__ ((noinline)) #endif + +#ifdef _WIN32 +#include + +#define CLOCK_MONOTONIC 1 + +struct timespec +{ + long tv_sec; + long tv_nsec; +}; + +int clock_gettime(int clock_id, struct timespec* tp) +{ + (void)clock_id; // Ignore clock_id, always use QPC + + static LARGE_INTEGER frequency = { 0 }; + LARGE_INTEGER counter; + + if (frequency.QuadPart == 0) + { + QueryPerformanceFrequency(&frequency); + } + + QueryPerformanceCounter(&counter); + + tp->tv_sec = (long)(counter.QuadPart / frequency.QuadPart); + tp->tv_nsec = (long)(((counter.QuadPart % frequency.QuadPart) * 1000000000LL) / frequency.QuadPart); + + return 0; +} + +#else +#include #endif uint32_t flag = 0; From a3058ddb0b0db10a03787f18a85813d5aa933b0e Mon Sep 17 00:00:00 2001 From: mborland Date: Tue, 16 Dec 2025 14:59:50 -0500 Subject: [PATCH 4/4] Add benchmark data of Intel libbid run with MSVC --- doc/modules/ROOT/pages/benchmarks.adoc | 46 ++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/doc/modules/ROOT/pages/benchmarks.adoc b/doc/modules/ROOT/pages/benchmarks.adoc index bea3f4617..52936c83e 100644 --- a/doc/modules/ROOT/pages/benchmarks.adoc +++ b/doc/modules/ROOT/pages/benchmarks.adoc @@ -29,6 +29,7 @@ To run the GCC benchmarks you can use the following command: `gcc benchmark_libd To run the Intel benchmarks you will need both the https://www.intel.com/content/www/us/en/developer/tools/oneapi/overview.html[Intel Compiler], and the https://www.intel.com/content/www/us/en/developer/articles/tool/intel-decimal-floating-point-math-library.html[library]. You can the use the following command: `icx benchmark_libbid.c -O3 $PATH_TO_LIBBID/libbid.a -std=c17` followed by: `./a.out` You can also use `gcc` instead of `icx`. +On windows the command is similarly: `cl benchmark_libbid.c /O2 /std:c17 ..\PATH_TO_LIBBID\cl000libbid.lib`, followed by: `.\benchmark_libbid.exe`. NOTE: The Intel benchmarks can only be run on one of their supported architectures: IA-32, IA-64, and Intel x64 @@ -725,6 +726,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14. | `decimal_fast128_t` | 801,708 | 4.300 +| Intel `BID_UINT32` +| 4,372,973 +| 23.457 +| Intel `BID_UINT64` +| 9,345,300 +| 50.129 +| Intel `BID_UINT128` +| 11,504,914 +| 61.714 |=== === Addition @@ -755,6 +765,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14. | `decimal_fast128_t` | 3,109,101 | 38.914 +| Intel `BID_UINT32` +| 4,967,728 +| 62.177 +| Intel `BID_UINT64` +| 6,268,077 +| 78.452 +| Intel `BID_UINT128` +| 4,847,330 +| 60.670 |=== === Subtraction @@ -785,6 +804,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14. | `decimal_fast128_t` | 2,963,570 | 9.167 +| Intel `BID_UINT32` +| 4,603,462 +| 14.240 +| Intel `BID_UINT64` +| 5,627,305 +| 17.407 +| Intel `BID_UINT128` +| 5,824,263 +| 18.016 |=== === Multiplication @@ -815,6 +843,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14. | `decimal_fast128_t` | 9,236,110 | 117.434 +| Intel `BID_UINT32` +| 3,833,363 +| 48.740 +| Intel `BID_UINT64` +| 11,671,369 +| 148.398 +| Intel `BID_UINT128` +| 62,036,577 +| 788.778 |=== === Division @@ -845,6 +882,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14. | `decimal_fast128_t` | 11,587,763 | 129,737 +| Intel `BID_UINT32` +| 5,037,576 +| 46.401 +| Intel `BID_UINT64` +| 8,768,259 +| 98.170 +| Intel `BID_UINT128` +| 38,519,644 +| 431.269 |=== === `from_chars`