Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions doc/modules/ROOT/pages/benchmarks.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ To run the GCC benchmarks you can use the following command: `gcc benchmark_libd
To run the Intel benchmarks you will need both the https://www.intel.com/content/www/us/en/developer/tools/oneapi/overview.html[Intel Compiler], and the https://www.intel.com/content/www/us/en/developer/articles/tool/intel-decimal-floating-point-math-library.html[library].
You can the use the following command: `icx benchmark_libbid.c -O3 $PATH_TO_LIBBID/libbid.a -std=c17` followed by: `./a.out`
You can also use `gcc` instead of `icx`.
On windows the command is similarly: `cl benchmark_libbid.c /O2 /std:c17 ..\PATH_TO_LIBBID\cl000libbid.lib`, followed by: `.\benchmark_libbid.exe`.

NOTE: The Intel benchmarks can only be run on one of their supported architectures: IA-32, IA-64, and Intel x64

Expand Down Expand Up @@ -725,6 +726,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
| `decimal_fast128_t`
| 801,708
| 4.300
| Intel `BID_UINT32`
| 4,372,973
| 23.457
| Intel `BID_UINT64`
| 9,345,300
| 50.129
| Intel `BID_UINT128`
| 11,504,914
| 61.714
|===

=== Addition
Expand Down Expand Up @@ -755,6 +765,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
| `decimal_fast128_t`
| 3,109,101
| 38.914
| Intel `BID_UINT32`
| 4,967,728
| 62.177
| Intel `BID_UINT64`
| 6,268,077
| 78.452
| Intel `BID_UINT128`
| 4,847,330
| 60.670
|===

=== Subtraction
Expand Down Expand Up @@ -785,6 +804,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
| `decimal_fast128_t`
| 2,963,570
| 9.167
| Intel `BID_UINT32`
| 4,603,462
| 14.240
| Intel `BID_UINT64`
| 5,627,305
| 17.407
| Intel `BID_UINT128`
| 5,824,263
| 18.016
|===

=== Multiplication
Expand Down Expand Up @@ -815,6 +843,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
| `decimal_fast128_t`
| 9,236,110
| 117.434
| Intel `BID_UINT32`
| 3,833,363
| 48.740
| Intel `BID_UINT64`
| 11,671,369
| 148.398
| Intel `BID_UINT128`
| 62,036,577
| 788.778
|===

=== Division
Expand Down Expand Up @@ -845,6 +882,15 @@ Run using an Intel i9-11900k chipset running Windows 11 and Visual Studio 17.14.
| `decimal_fast128_t`
| 11,587,763
| 129,737
| Intel `BID_UINT32`
| 5,037,576
| 46.401
| Intel `BID_UINT64`
| 8,768,259
| 98.170
| Intel `BID_UINT128`
| 38,519,644
| 431.269
|===

=== `from_chars`
Expand Down
102 changes: 75 additions & 27 deletions test/benchmark_libbid.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,71 @@
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt

#define _POSIX_C_SOURCE 199309L
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#else
# define _POSIX_C_SOURCE 199309L
#endif

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <time.h>
#include <inttypes.h>
#include <float.h>
#include <fenv.h>

#include "..\LIBRARY\src\bid_conf.h"
#include "..\LIBRARY\src\bid_functions.h"

typedef BID_UINT32 Decimal32;
typedef BID_UINT64 Decimal64;
#include "../LIBRARY/src/bid_conf.h"
#include "../LIBRARY/src/bid_functions.h"
typedef BID_UINT128 Decimal128;

#define K 20000000
#define N 5

#ifdef _MSC_VER
# define BOOST_DECIMAL_NOINLINE __declspec(noinline)
#else
# define BOOST_DECIMAL_NOINLINE __attribute__ ((noinline))
#endif

#ifdef _WIN32
#include <windows.h>

#define CLOCK_MONOTONIC 1

struct timespec
{
long tv_sec;
long tv_nsec;
};

int clock_gettime(int clock_id, struct timespec* tp)
{
(void)clock_id; // Ignore clock_id, always use QPC

static LARGE_INTEGER frequency = { 0 };
LARGE_INTEGER counter;

if (frequency.QuadPart == 0)
{
QueryPerformanceFrequency(&frequency);
}

QueryPerformanceCounter(&counter);

tp->tv_sec = (long)(counter.QuadPart / frequency.QuadPart);
tp->tv_nsec = (long)(((counter.QuadPart % frequency.QuadPart) * 1000000000LL) / frequency.QuadPart);

return 0;
}

#else
#include <time.h>
#endif

uint32_t flag = 0;

uint32_t random_uint32(void)
Expand All @@ -45,15 +91,15 @@ uint64_t random_uint64(void)
return r;
}

__attribute__ ((noinline)) void generate_vector_32(Decimal32* buffer, size_t buffer_len)
BOOST_DECIMAL_NOINLINE void generate_vector_32(Decimal32* buffer, size_t buffer_len)
{
for (size_t i = 0; i < buffer_len; ++i)
{
buffer[i] = bid32_from_uint32(random_uint32(), BID_ROUNDING_TO_NEAREST, &flag);
}
}

__attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char* label)
BOOST_DECIMAL_NOINLINE void test_comparisons_32(Decimal32* data, const char* label)
{
struct timespec t1, t2;
clock_gettime(CLOCK_MONOTONIC, &t1);
Expand Down Expand Up @@ -82,15 +128,15 @@ __attribute__ ((noinline)) void test_comparisons_32(Decimal32* data, const char*
printf("Comparisons <%-10s >: %-10" PRIu64 " us (s=%zu)\n", label, elapsed_time_us, s);
}

__attribute__ ((noinline)) void generate_vector_64(Decimal64* buffer, size_t buffer_len)
BOOST_DECIMAL_NOINLINE void generate_vector_64(Decimal64* buffer, size_t buffer_len)
{
for (size_t i = 0; i < buffer_len; ++i)
{
buffer[i] = bid64_from_uint64(random_uint64(), BID_ROUNDING_TO_NEAREST, &flag);
}
}

__attribute__ ((noinline)) void test_comparisons_64(Decimal64* data, const char* label)
BOOST_DECIMAL_NOINLINE void test_comparisons_64(Decimal64* data, const char* label)
{
struct timespec t1, t2;
clock_gettime(CLOCK_MONOTONIC, &t1);
Expand Down Expand Up @@ -153,12 +199,12 @@ Decimal128 random_decimal128(void)

// 5. Parse to decimal128
_IDEC_flags flags = 0;
Decimal128 result = bid128_from_string(str, &flags);
Decimal128 result = bid128_from_string(str, BID_ROUNDING_TO_NEAREST, &flags);

return result;
}

__attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size_t buffer_len)
BOOST_DECIMAL_NOINLINE void generate_vector_128(Decimal128* buffer, size_t buffer_len)
{
size_t i = 0;
while (i < buffer_len)
Expand All @@ -168,7 +214,7 @@ __attribute__ ((__noinline__)) void generate_vector_128(Decimal128* buffer, size
}
}

__attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const char* label)
BOOST_DECIMAL_NOINLINE void test_comparisons_128(Decimal128* data, const char* label)
{
struct timespec t1, t2;
clock_gettime(CLOCK_MONOTONIC, &t1);
Expand Down Expand Up @@ -200,26 +246,26 @@ __attribute__ ((__noinline__)) void test_comparisons_128(Decimal128* data, const

typedef Decimal32 (*operation_32)(Decimal32, Decimal32);

__attribute__ ((noinline)) Decimal32 add_32(Decimal32 a, Decimal32 b)
BOOST_DECIMAL_NOINLINE Decimal32 add_32(Decimal32 a, Decimal32 b)
{
return bid32_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}
__attribute__ ((noinline)) Decimal32 sub_32(Decimal32 a, Decimal32 b)
BOOST_DECIMAL_NOINLINE Decimal32 sub_32(Decimal32 a, Decimal32 b)
{
return bid32_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((noinline)) Decimal32 mul_32(Decimal32 a, Decimal32 b)
BOOST_DECIMAL_NOINLINE Decimal32 mul_32(Decimal32 a, Decimal32 b)
{
return bid32_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((noinline)) Decimal32 div_32(Decimal32 a, Decimal32 b)
BOOST_DECIMAL_NOINLINE Decimal32 div_32(Decimal32 a, Decimal32 b)
{
return bid32_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label)
BOOST_DECIMAL_NOINLINE void test_two_element_operation_32(Decimal32* data, operation_32 op, const char* label, const char* op_label)
{
struct timespec t1, t2;
clock_gettime(CLOCK_MONOTONIC, &t1);
Expand All @@ -245,27 +291,27 @@ __attribute__ ((noinline)) void test_two_element_operation_32(Decimal32* data, o

typedef Decimal64 (*operation_64)(Decimal64, Decimal64);

__attribute__ ((noinline)) Decimal64 add_64(Decimal64 a, Decimal64 b)
BOOST_DECIMAL_NOINLINE Decimal64 add_64(Decimal64 a, Decimal64 b)
{
return bid64_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((noinline)) Decimal64 sub_64(Decimal64 a, Decimal64 b)
BOOST_DECIMAL_NOINLINE Decimal64 sub_64(Decimal64 a, Decimal64 b)
{
return bid64_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((noinline)) Decimal64 mul_64(Decimal64 a, Decimal64 b)
BOOST_DECIMAL_NOINLINE Decimal64 mul_64(Decimal64 a, Decimal64 b)
{
return bid64_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((noinline)) Decimal64 div_64(Decimal64 a, Decimal64 b)
BOOST_DECIMAL_NOINLINE Decimal64 div_64(Decimal64 a, Decimal64 b)
{
return bid64_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label)
BOOST_DECIMAL_NOINLINE void test_two_element_operation_64(Decimal64* data, operation_64 op, const char* label, const char* op_label)
{
struct timespec t1, t2;
clock_gettime(CLOCK_MONOTONIC, &t1);
Expand All @@ -292,27 +338,27 @@ __attribute__ ((noinline)) void test_two_element_operation_64(Decimal64* data, o

typedef Decimal128 (*operation_128)(Decimal128, Decimal128);

__attribute__ ((__noinline__)) Decimal128 add_128(Decimal128 a, Decimal128 b)
BOOST_DECIMAL_NOINLINE Decimal128 add_128(Decimal128 a, Decimal128 b)
{
return bid128_add(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((__noinline__)) Decimal128 sub_128(Decimal128 a, Decimal128 b)
BOOST_DECIMAL_NOINLINE Decimal128 sub_128(Decimal128 a, Decimal128 b)
{
return bid128_sub(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((__noinline__)) Decimal128 mul_128(Decimal128 a, Decimal128 b)
BOOST_DECIMAL_NOINLINE Decimal128 mul_128(Decimal128 a, Decimal128 b)
{
return bid128_mul(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((__noinline__)) Decimal128 div_128(Decimal128 a, Decimal128 b)
BOOST_DECIMAL_NOINLINE Decimal128 div_128(Decimal128 a, Decimal128 b)
{
return bid128_div(a, b, BID_ROUNDING_TO_NEAREST, &flag);
}

__attribute__ ((__noinline__)) void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label)
BOOST_DECIMAL_NOINLINE void test_two_element_operation_128(Decimal128* data, operation_128 op, const char* label, const char* op_label)
{
struct timespec t1, t2;
clock_gettime(CLOCK_MONOTONIC, &t1);
Expand Down Expand Up @@ -342,8 +388,10 @@ int main()
// One time init of random number generator
srand(time(NULL));

#ifndef _WIN32
fedisableexcept(FE_ALL_EXCEPT);

#endif

Decimal32* d32_array = malloc(K * sizeof(Decimal32));
Decimal64* d64_array = malloc(K * sizeof(Decimal64));
Decimal128* d128_array = malloc(K * sizeof(Decimal128));
Expand Down