From 565565b9f74c0954ca902e97b83276e5d5c16dac Mon Sep 17 00:00:00 2001
From: mborland <matt@mattborland.com>
Date: Tue, 16 Dec 2025 16:15:39 -0500
Subject: [PATCH 1/4] Make noinline portable

---
 test/benchmark_libbid.c | 51 +++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/test/benchmark_libbid.c b/test/benchmark_libbid.c
index 5a1b15273..7ada30d9b 100644
--- a/test/benchmark_libbid.c
+++ b/test/benchmark_libbid.c
@@ -21,6 +21,13 @@ typedef BID_UINT128 Decimal128;
 #define K 20000000
 #define N 5
 
+#ifdef _MSC_VER
+#  define BOOST_DECIMAL_NOINLINE  __declspec(noinline)
+#else
+#  define BOOST_DECIMAL_NOINLINE __attribute__ ((noinline))
+#endif
+#endif
+
 uint32_t flag = 0;
 
 uint32_t random_uint32(void) 
@@ -45,7 +52,7 @@ uint64_t random_uint64(void)
     return r;
 }
 
-__attribute__ ((noinline)) void generate_vector_32(Decimal32* buffer, size_t buffer_len)
+BOOST_DECIMAL_NOINLINE void generate_vector_32(Decimal32* buffer, size_t buffer_len)
 {
     for (size_t i = 0; i < buffer_len; ++i)
     {
@@ -53,7 +60,7 @@ __attribute__ ((noinline)) void generate_vector_32(Decimal32* buffer, size_t buf
     }
 }
 
-__attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char* label)
+BOOST_DECIMAL_NOINLINE void test_comparisons_32(Decimal32* data, const char* label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -82,7 +89,7 @@ __attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char*
     printf("Comparisons    <%-10s >: %-10" PRIu64 " us (s=%zu)\n", label, elapsed_time_us, s);
 }
 
-__attribute__ ((noinline)) void generate_vector_64(Decimal64* buffer, size_t buffer_len)
+BOOST_DECIMAL_NOINLINE void generate_vector_64(Decimal64* buffer, size_t buffer_len)
 {
     for (size_t i = 0; i < buffer_len; ++i)
     {
@@ -90,7 +97,7 @@ __attribute__ ((noinline)) void generate_vector_64(Decimal64* buffer, size_t buf
     }
 }
 
-__attribute__ ((noinline)) void test_comparisons_64(Decimal64* data, const char* label)
+BOOST_DECIMAL_NOINLINE void test_comparisons_64(Decimal64* data, const char* label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -153,12 +160,12 @@ Decimal128 random_decimal128(void)
 
     // 5. Parse to decimal128
     _IDEC_flags flags = 0;
-    Decimal128 result = bid128_from_string(str, &flags);
+    Decimal128 result = bid128_from_string(str, BID_ROUNDING_TO_NEAREST, &flags);
 
     return result;
 }
 
-__attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size_t buffer_len)
+BOOST_DECIMAL_NOINLINE void generate_vector_128(Decimal128* buffer, size_t buffer_len)
 {
     size_t i = 0;
     while (i < buffer_len)
@@ -168,7 +175,7 @@ __attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size
     }
 }
 
-__attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const char* label)
+BOOST_DECIMAL_NOINLINE void test_comparisons_128(Decimal128* data, const char* label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -200,26 +207,26 @@ __attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const
 
 typedef Decimal32 (*operation_32)(Decimal32, Decimal32);
 
-__attribute__ ((noinline)) Decimal32 add_32(Decimal32 a, Decimal32 b)
+BOOST_DECIMAL_NOINLINE Decimal32 add_32(Decimal32 a, Decimal32 b)
 {
     return bid32_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
-__attribute__ ((noinline)) Decimal32 sub_32(Decimal32 a, Decimal32 b)
+BOOST_DECIMAL_NOINLINE Decimal32 sub_32(Decimal32 a, Decimal32 b)
 {
     return bid32_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) Decimal32 mul_32(Decimal32 a, Decimal32 b)
+BOOST_DECIMAL_NOINLINE Decimal32 mul_32(Decimal32 a, Decimal32 b)
 {
     return bid32_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) Decimal32 div_32(Decimal32 a, Decimal32 b)
+BOOST_DECIMAL_NOINLINE Decimal32 div_32(Decimal32 a, Decimal32 b)
 {
     return bid32_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label)
+BOOST_DECIMAL_NOINLINE void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -245,27 +252,27 @@ __attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, o
 
 typedef Decimal64 (*operation_64)(Decimal64, Decimal64);
 
-__attribute__ ((noinline)) Decimal64 add_64(Decimal64 a, Decimal64 b)
+BOOST_DECIMAL_NOINLINE Decimal64 add_64(Decimal64 a, Decimal64 b)
 {
     return bid64_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) Decimal64 sub_64(Decimal64 a, Decimal64 b)
+BOOST_DECIMAL_NOINLINE Decimal64 sub_64(Decimal64 a, Decimal64 b)
 {
     return bid64_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) Decimal64 mul_64(Decimal64 a, Decimal64 b)
+BOOST_DECIMAL_NOINLINE Decimal64 mul_64(Decimal64 a, Decimal64 b)
 {
     return bid64_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) Decimal64 div_64(Decimal64 a, Decimal64 b)
+BOOST_DECIMAL_NOINLINE Decimal64 div_64(Decimal64 a, Decimal64 b)
 {
     return bid64_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label)
+BOOST_DECIMAL_NOINLINE void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);
@@ -292,27 +299,27 @@ __attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, o
 
 typedef Decimal128 (*operation_128)(Decimal128, Decimal128);
 
-__attribute__ ((__noinline__)) Decimal128 add_128(Decimal128 a, Decimal128 b)
+BOOST_DECIMAL_NOINLINE Decimal128 add_128(Decimal128 a, Decimal128 b)
 {
     return bid128_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((__noinline__)) Decimal128 sub_128(Decimal128 a, Decimal128 b)
+BOOST_DECIMAL_NOINLINE Decimal128 sub_128(Decimal128 a, Decimal128 b)
 {
     return bid128_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((__noinline__)) Decimal128 mul_128(Decimal128 a, Decimal128 b)
+BOOST_DECIMAL_NOINLINE Decimal128 mul_128(Decimal128 a, Decimal128 b)
 {
     return bid128_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((__noinline__)) Decimal128 div_128(Decimal128 a, Decimal128 b)
+BOOST_DECIMAL_NOINLINE Decimal128 div_128(Decimal128 a, Decimal128 b)
 {
     return bid128_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
 }
 
-__attribute__ ((__noinline__)) void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label)
+BOOST_DECIMAL_NOINLINE void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label)
 {
     struct timespec t1, t2;
     clock_gettime(CLOCK_MONOTONIC, &t1);

From 8ea4f1d10796449831ada87ee948d828f2826721 Mon Sep 17 00:00:00 2001
From: mborland <matt@mattborland.com>
Date: Tue, 16 Dec 2025 16:16:00 -0500
Subject: [PATCH 2/4] Don't access floating point environment on windows
 platform

---
 test/benchmark_libbid.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/benchmark_libbid.c b/test/benchmark_libbid.c
index 7ada30d9b..dda1f8ce9 100644
--- a/test/benchmark_libbid.c
+++ b/test/benchmark_libbid.c
@@ -349,8 +349,10 @@ int main()
     // One time init of random number generator
     srand(time(NULL));
 
+    #ifndef _WIN32
     fedisableexcept(FE_ALL_EXCEPT);
-    
+    #endif
+
     Decimal32* d32_array = malloc(K * sizeof(Decimal32));
     Decimal64* d64_array = malloc(K * sizeof(Decimal64));
     Decimal128* d128_array = malloc(K * sizeof(Decimal128));

From a930b39deef235e90b975189f385ffc9c49f7404 Mon Sep 17 00:00:00 2001
From: mborland <matt@mattborland.com>
Date: Tue, 16 Dec 2025 16:16:26 -0500
Subject: [PATCH 3/4] Add windows friendly CLOCK_MONOTONIC definition

---
 test/benchmark_libbid.c | 47 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/test/benchmark_libbid.c b/test/benchmark_libbid.c
index dda1f8ce9..d3464284a 100644
--- a/test/benchmark_libbid.c
+++ b/test/benchmark_libbid.c
@@ -2,20 +2,25 @@
 // Distributed under the Boost Software License, Version 1.0.
 // https://www.boost.org/LICENSE_1_0.txt
 
-#define _POSIX_C_SOURCE 199309L
+#ifdef _WIN32
+#  define WIN32_LEAN_AND_MEAN
+#  include <windows.h>
+#else
+#  define _POSIX_C_SOURCE 199309L
+#endif
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
-#include <time.h>
 #include <inttypes.h>
 #include <float.h>
 #include <fenv.h>
 
+#include "..\LIBRARY\src\bid_conf.h"
+#include "..\LIBRARY\src\bid_functions.h"
+
 typedef BID_UINT32 Decimal32;
 typedef BID_UINT64 Decimal64;
-#include "../LIBRARY/src/bid_conf.h"
-#include "../LIBRARY/src/bid_functions.h"
 typedef BID_UINT128 Decimal128;
 
 #define K 20000000
@@ -26,6 +31,40 @@ typedef BID_UINT128 Decimal128;
 #else
 #  define BOOST_DECIMAL_NOINLINE __attribute__ ((noinline))
 #endif
+
+#ifdef _WIN32
+#include <windows.h>
+
+#define CLOCK_MONOTONIC 1
+
+struct timespec
+{
+    long tv_sec;
+    long tv_nsec;
+};
+
+int clock_gettime(int clock_id, struct timespec* tp) 
+{
+    (void)clock_id;  // Ignore clock_id, always use QPC
+
+    static LARGE_INTEGER frequency = { 0 };
+    LARGE_INTEGER counter;
+
+    if (frequency.QuadPart == 0) 
+    {
+        QueryPerformanceFrequency(&frequency);
+    }
+
+    QueryPerformanceCounter(&counter);
+
+    tp->tv_sec = (long)(counter.QuadPart / frequency.QuadPart);
+    tp->tv_nsec = (long)(((counter.QuadPart % frequency.QuadPart) * 1000000000LL) / frequency.QuadPart);
+
+    return 0;
+}
+
+#else
+#include <time.h>
 #endif
 
 uint32_t flag = 0;

From a3058ddb0b0db10a03787f18a85813d5aa933b0e Mon Sep 17 00:00:00 2001
From: mborland <matt@mattborland.com>
Date: Tue, 16 Dec 2025 14:59:50 -0500
Subject: [PATCH 4/4] Add benchmark data of Intel libbid run with MSVC

---
 doc/modules/ROOT/pages/benchmarks.adoc | 46 ++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/doc/modules/ROOT/pages/benchmarks.adoc b/doc/modules/ROOT/pages/benchmarks.adoc
index bea3f4617..52936c83e 100644
--- a/doc/modules/ROOT/pages/benchmarks.adoc
+++ b/doc/modules/ROOT/pages/benchmarks.adoc
@@ -29,6 +29,7 @@ To run the GCC benchmarks you can use the following command: `gcc benchmark_libd
 To run the Intel benchmarks you will need both the https://www.intel.com/content/www/us/en/developer/tools/oneapi/overview.html[Intel Compiler], and the https://www.intel.com/content/www/us/en/developer/articles/tool/intel-decimal-floating-point-math-library.html[library].
 You can the use the following command: `icx benchmark_libbid.c -O3 $PATH_TO_LIBBID/libbid.a -std=c17` followed by: `./a.out`
 You can also use `gcc` instead of `icx`.
+On windows the command is similarly: `cl benchmark_libbid.c /O2 /std:c17 ..\PATH_TO_LIBBID\cl000libbid.lib`, followed by: `.\benchmark_libbid.exe`.
 
 NOTE: The Intel benchmarks can only be run on one of their supported architectures: IA-32, IA-64, and Intel x64
 
@@ -725,6 +726,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
 | `decimal_fast128_t`
 | 801,708
 | 4.300
+| Intel `BID_UINT32`
+| 4,372,973
+| 23.457
+| Intel `BID_UINT64`
+| 9,345,300
+| 50.129
+| Intel `BID_UINT128`
+| 11,504,914
+| 61.714
 |===
 
 === Addition
@@ -755,6 +765,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
 | `decimal_fast128_t`
 | 3,109,101
 | 38.914
+| Intel `BID_UINT32`
+| 4,967,728
+| 62.177
+| Intel `BID_UINT64`
+| 6,268,077
+| 78.452
+| Intel `BID_UINT128`
+| 4,847,330
+| 60.670
 |===
 
 === Subtraction
@@ -785,6 +804,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
 | `decimal_fast128_t`
 | 2,963,570
 | 9.167
+| Intel `BID_UINT32`
+| 4,603,462
+| 14.240
+| Intel `BID_UINT64`
+| 5,627,305
+| 17.407
+| Intel `BID_UINT128`
+| 5,824,263
+| 18.016
 |===
 
 === Multiplication
@@ -815,6 +843,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
 | `decimal_fast128_t`
 | 9,236,110
 | 117.434
+| Intel `BID_UINT32`
+| 3,833,363
+| 48.740
+| Intel `BID_UINT64`
+| 11,671,369
+| 148.398
+| Intel `BID_UINT128`
+| 62,036,577
+| 788.778
 |===
 
 === Division
@@ -845,6 +882,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
 | `decimal_fast128_t`
 | 11,587,763
 | 129,737
+| Intel `BID_UINT32`
+| 5,037,576
+| 46.401
+| Intel `BID_UINT64`
+| 8,768,259
+| 98.170
+| Intel `BID_UINT128`
+| 38,519,644
+| 431.269
 |===
 
 === `from_chars`