From 79a7e7f63f598acade7c975ee397edfa53823c42 Mon Sep 17 00:00:00 2001 From: Steve Gerbino Date: Wed, 4 Feb 2026 14:27:14 +0100 Subject: [PATCH 1/2] Consolidate benchmarks into single executables Combine four separate benchmark executables into one for each library: - asio_bench: unified Asio benchmarks with --category and --bench filters - corosio_bench: unified Corosio benchmarks with --backend, --category, and --bench filters Extract make_socket_pair into shared socket_utils.hpp to reduce duplication. --- bench/asio/CMakeLists.txt | 43 +-- bench/asio/benchmarks.hpp | 59 ++++ bench/asio/http_server_bench.cpp | 391 ++++++++------------- bench/asio/io_context_bench.cpp | 270 +++++--------- bench/asio/main.cpp | 138 ++++++++ bench/asio/socket_latency_bench.cpp | 256 ++++---------- bench/asio/socket_throughput_bench.cpp | 294 +++++----------- bench/asio/socket_utils.hpp | 44 +++ bench/corosio/CMakeLists.txt | 31 +- bench/corosio/benchmarks.hpp | 63 ++++ bench/corosio/http_server_bench.cpp | 410 ++++++++-------------- bench/corosio/io_context_bench.cpp | 334 ++++++------------ bench/corosio/main.cpp | 175 +++++++++ bench/corosio/socket_latency_bench.cpp | 286 +++++---------- bench/corosio/socket_throughput_bench.cpp | 321 ++++++----------- 15 files changed, 1385 insertions(+), 1730 deletions(-) create mode 100644 bench/asio/benchmarks.hpp create mode 100644 bench/asio/main.cpp create mode 100644 bench/asio/socket_utils.hpp create mode 100644 bench/corosio/benchmarks.hpp create mode 100644 bench/corosio/main.cpp diff --git a/bench/asio/CMakeLists.txt b/bench/asio/CMakeLists.txt index f68d705d..fd563510 100644 --- a/bench/asio/CMakeLists.txt +++ b/bench/asio/CMakeLists.txt @@ -8,25 +8,28 @@ # Official repository: https://github.com/cppalliance/corosio # -# Asio benchmark executables for comparison +# Asio benchmark executable for comparison -function(asio_add_benchmark name source) - add_executable(${name} ${source}) - target_link_libraries(${name} - PRIVATE - Boost::asio - Threads::Threads) - target_compile_features(${name} PUBLIC cxx_std_20) - target_compile_options(${name} - PRIVATE - $<$:-fcoroutines>) - set_property(TARGET ${name} PROPERTY FOLDER "benchmarks/asio") - if (COROSIO_BENCH_LTO_SUPPORTED) - set_property(TARGET ${name} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) - endif () -endfunction() +add_executable(asio_bench + main.cpp + io_context_bench.cpp + socket_throughput_bench.cpp + socket_latency_bench.cpp + http_server_bench.cpp) -asio_add_benchmark(asio_bench_io_context io_context_bench.cpp) -asio_add_benchmark(asio_bench_socket_throughput socket_throughput_bench.cpp) -asio_add_benchmark(asio_bench_socket_latency socket_latency_bench.cpp) -asio_add_benchmark(asio_bench_http_server http_server_bench.cpp) +target_link_libraries(asio_bench + PRIVATE + Boost::asio + Threads::Threads) + +target_compile_features(asio_bench PUBLIC cxx_std_20) + +target_compile_options(asio_bench + PRIVATE + $<$:-fcoroutines>) + +set_property(TARGET asio_bench PROPERTY FOLDER "benchmarks/asio") + +if (COROSIO_BENCH_LTO_SUPPORTED) + set_property(TARGET asio_bench PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) +endif () diff --git a/bench/asio/benchmarks.hpp b/bench/asio/benchmarks.hpp new file mode 100644 index 00000000..17557f50 --- /dev/null +++ b/bench/asio/benchmarks.hpp @@ -0,0 +1,59 @@ +// +// Copyright (c) 2026 Steve Gerbino +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#ifndef ASIO_BENCH_BENCHMARKS_HPP +#define ASIO_BENCH_BENCHMARKS_HPP + +#include "../common/benchmark.hpp" + +namespace asio_bench { + +/** Run io_context benchmarks. + + @param collector Results collector. + @param filter Optional filter: nullptr or "all" runs all, or a specific + benchmark name (single_threaded, multithreaded, interleaved, concurrent). +*/ +void run_io_context_benchmarks( + bench::result_collector& collector, + char const* filter ); + +/** Run socket throughput benchmarks. + + @param collector Results collector. + @param filter Optional filter: nullptr or "all" runs all, or a specific + benchmark name (unidirectional, bidirectional). +*/ +void run_socket_throughput_benchmarks( + bench::result_collector& collector, + char const* filter ); + +/** Run socket latency benchmarks. + + @param collector Results collector. + @param filter Optional filter: nullptr or "all" runs all, or a specific + benchmark name (pingpong, concurrent). +*/ +void run_socket_latency_benchmarks( + bench::result_collector& collector, + char const* filter ); + +/** Run HTTP server benchmarks. + + @param collector Results collector. + @param filter Optional filter: nullptr or "all" runs all, or a specific + benchmark name (single_conn, concurrent, multithread). +*/ +void run_http_server_benchmarks( + bench::result_collector& collector, + char const* filter ); + +} // namespace asio_bench + +#endif diff --git a/bench/asio/http_server_bench.cpp b/bench/asio/http_server_bench.cpp index cfb5b5c4..17da83f1 100644 --- a/bench/asio/http_server_bench.cpp +++ b/bench/asio/http_server_bench.cpp @@ -7,8 +7,9 @@ // Official repository: https://github.com/cppalliance/corosio // -#include -#include +#include "benchmarks.hpp" +#include "socket_utils.hpp" + #include #include #include @@ -28,167 +29,138 @@ #include "../common/benchmark.hpp" #include "../common/http_protocol.hpp" -namespace asio = boost::asio; -using tcp = asio::ip::tcp; - -// Create a connected socket pair using TCP loopback -std::pair make_socket_pair(asio::io_context& ioc) -{ - tcp::acceptor acceptor(ioc, tcp::endpoint(tcp::v4(), 0)); - acceptor.set_option(tcp::acceptor::reuse_address(true)); - - tcp::socket client(ioc); - tcp::socket server(ioc); - - auto endpoint = acceptor.local_endpoint(); - client.connect(tcp::endpoint(asio::ip::address_v4::loopback(), endpoint.port())); - server = acceptor.accept(); +namespace asio_bench { +namespace { - client.set_option(tcp::no_delay(true)); - server.set_option(tcp::no_delay(true)); - - return {std::move(client), std::move(server)}; -} - -// Server coroutine: reads requests and sends responses asio::awaitable server_task( tcp::socket& sock, int num_requests, - int& completed_requests) + int& completed_requests ) { std::string buf; try { - while (completed_requests < num_requests) + while( completed_requests < num_requests ) { - // Read until end of HTTP headers std::size_t n = co_await asio::async_read_until( sock, - asio::dynamic_buffer(buf), + asio::dynamic_buffer( buf ), "\r\n\r\n", - asio::use_awaitable); + asio::use_awaitable ); - // Send response co_await asio::async_write( sock, - asio::buffer(bench::http::small_response, bench::http::small_response_size), - asio::use_awaitable); + asio::buffer( bench::http::small_response, bench::http::small_response_size ), + asio::use_awaitable ); ++completed_requests; - buf.erase(0, n); + buf.erase( 0, n ); } } - catch (std::exception const&) {} + catch( std::exception const& ) {} } -// Client coroutine: sends requests and reads responses asio::awaitable client_task( tcp::socket& sock, int num_requests, - bench::statistics& latency_stats) + bench::statistics& latency_stats ) { std::string buf; try { - for (int i = 0; i < num_requests; ++i) + for( int i = 0; i < num_requests; ++i ) { bench::stopwatch sw; - // Send request co_await asio::async_write( sock, - asio::buffer(bench::http::small_request, bench::http::small_request_size), - asio::use_awaitable); + asio::buffer( bench::http::small_request, bench::http::small_request_size ), + asio::use_awaitable ); - // Read response headers std::size_t header_end = co_await asio::async_read_until( sock, - asio::dynamic_buffer(buf), + asio::dynamic_buffer( buf ), "\r\n\r\n", - asio::use_awaitable); + asio::use_awaitable ); - // Parse Content-Length from headers and read body if needed - std::string_view headers(buf.data(), header_end); + std::string_view headers( buf.data(), header_end ); std::size_t content_length = 0; - auto pos = headers.find("Content-Length: "); - if (pos != std::string_view::npos) + auto pos = headers.find( "Content-Length: " ); + if( pos != std::string_view::npos ) { pos += 16; - while (pos < headers.size() && headers[pos] >= '0' && headers[pos] <= '9') + while( pos < headers.size() && headers[pos] >= '0' && headers[pos] <= '9' ) { - content_length = content_length * 10 + (headers[pos] - '0'); + content_length = content_length * 10 + ( headers[pos] - '0' ); ++pos; } } - // Read body if not already in buffer std::size_t total_size = header_end + content_length; - if (buf.size() < total_size) + if( buf.size() < total_size ) { std::size_t need = total_size - buf.size(); std::size_t old_size = buf.size(); - buf.resize(total_size); + buf.resize( total_size ); co_await asio::async_read( sock, - asio::buffer(buf.data() + old_size, need), - asio::use_awaitable); + asio::buffer( buf.data() + old_size, need ), + asio::use_awaitable ); } double latency_us = sw.elapsed_us(); - latency_stats.add(latency_us); + latency_stats.add( latency_us ); - buf.erase(0, total_size); + buf.erase( 0, total_size ); } } - catch (std::exception const&) {} + catch( std::exception const& ) {} } -// Single connection benchmark -bench::benchmark_result bench_single_connection(int num_requests) +bench::benchmark_result bench_single_connection( int num_requests ) { std::cout << " Requests: " << num_requests << "\n"; asio::io_context ioc; - auto [client, server] = make_socket_pair(ioc); + auto [client, server] = make_socket_pair( ioc ); int completed_requests = 0; bench::statistics latency_stats; bench::stopwatch total_sw; - asio::co_spawn(ioc, - server_task(server, num_requests, completed_requests), - asio::detached); - asio::co_spawn(ioc, - client_task(client, num_requests, latency_stats), - asio::detached); + asio::co_spawn( ioc, + server_task( server, num_requests, completed_requests ), + asio::detached ); + asio::co_spawn( ioc, + client_task( client, num_requests, latency_stats ), + asio::detached ); ioc.run(); double elapsed = total_sw.elapsed_seconds(); - double requests_per_sec = static_cast(num_requests) / elapsed; + double requests_per_sec = static_cast( num_requests ) / elapsed; std::cout << " Completed: " << num_requests << " requests\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(requests_per_sec) << "\n"; - bench::print_latency_stats(latency_stats, "Request latency"); + std::cout << " Throughput: " << bench::format_rate( requests_per_sec ) << "\n"; + bench::print_latency_stats( latency_stats, "Request latency" ); std::cout << "\n"; client.close(); server.close(); - return bench::benchmark_result("single_conn") - .add("num_requests", num_requests) - .add("num_connections", 1) - .add("requests_per_sec", requests_per_sec) - .add_latency_stats("request_latency", latency_stats); + return bench::benchmark_result( "single_conn" ) + .add( "num_requests", num_requests ) + .add( "num_connections", 1 ) + .add( "requests_per_sec", requests_per_sec ) + .add_latency_stats( "request_latency", latency_stats ); } -// Concurrent connections benchmark -bench::benchmark_result bench_concurrent_connections(int num_connections, int requests_per_conn) +bench::benchmark_result bench_concurrent_connections( int num_connections, int requests_per_conn ) { int total_requests = num_connections * requests_per_conn; std::cout << " Connections: " << num_connections @@ -199,70 +171,68 @@ bench::benchmark_result bench_concurrent_connections(int num_connections, int re std::vector clients; std::vector servers; - std::vector completed(num_connections, 0); - std::vector stats(num_connections); + std::vector completed( num_connections, 0 ); + std::vector stats( num_connections ); - clients.reserve(num_connections); - servers.reserve(num_connections); + clients.reserve( num_connections ); + servers.reserve( num_connections ); - for (int i = 0; i < num_connections; ++i) + for( int i = 0; i < num_connections; ++i ) { - auto [c, s] = make_socket_pair(ioc); - clients.push_back(std::move(c)); - servers.push_back(std::move(s)); + auto [c, s] = make_socket_pair( ioc ); + clients.push_back( std::move( c ) ); + servers.push_back( std::move( s ) ); } bench::stopwatch total_sw; - for (int i = 0; i < num_connections; ++i) + for( int i = 0; i < num_connections; ++i ) { - asio::co_spawn(ioc, - server_task(servers[i], requests_per_conn, completed[i]), - asio::detached); - asio::co_spawn(ioc, - client_task(clients[i], requests_per_conn, stats[i]), - asio::detached); + asio::co_spawn( ioc, + server_task( servers[i], requests_per_conn, completed[i] ), + asio::detached ); + asio::co_spawn( ioc, + client_task( clients[i], requests_per_conn, stats[i] ), + asio::detached ); } ioc.run(); double elapsed = total_sw.elapsed_seconds(); - double requests_per_sec = static_cast(total_requests) / elapsed; + double requests_per_sec = static_cast( total_requests ) / elapsed; - // Aggregate latency stats double total_mean = 0; double total_p99 = 0; - for (auto& s : stats) + for( auto& s : stats ) { total_mean += s.mean(); total_p99 += s.p99(); } std::cout << " Completed: " << total_requests << " requests\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(requests_per_sec) << "\n"; + std::cout << " Throughput: " << bench::format_rate( requests_per_sec ) << "\n"; std::cout << " Avg mean latency: " - << bench::format_latency(total_mean / num_connections) << "\n"; + << bench::format_latency( total_mean / num_connections ) << "\n"; std::cout << " Avg p99 latency: " - << bench::format_latency(total_p99 / num_connections) << "\n\n"; + << bench::format_latency( total_p99 / num_connections ) << "\n\n"; - for (auto& c : clients) + for( auto& c : clients ) c.close(); - for (auto& s : servers) + for( auto& s : servers ) s.close(); - return bench::benchmark_result("concurrent_" + std::to_string(num_connections)) - .add("num_connections", num_connections) - .add("requests_per_conn", requests_per_conn) - .add("total_requests", total_requests) - .add("requests_per_sec", requests_per_sec) - .add("avg_mean_latency_us", total_mean / num_connections) - .add("avg_p99_latency_us", total_p99 / num_connections); + return bench::benchmark_result( "concurrent_" + std::to_string( num_connections ) ) + .add( "num_connections", num_connections ) + .add( "requests_per_conn", requests_per_conn ) + .add( "total_requests", total_requests ) + .add( "requests_per_sec", requests_per_sec ) + .add( "avg_mean_latency_us", total_mean / num_connections ) + .add( "avg_p99_latency_us", total_p99 / num_connections ); } -// Multi-threaded benchmark: multiple threads calling run() -bench::benchmark_result bench_multithread(int num_threads, int num_connections, int requests_per_conn) +bench::benchmark_result bench_multithread( int num_threads, int num_connections, int requests_per_conn ) { int total_requests = num_connections * requests_per_conn; std::cout << " Threads: " << num_threads @@ -270,192 +240,117 @@ bench::benchmark_result bench_multithread(int num_threads, int num_connections, << ", Requests per connection: " << requests_per_conn << ", Total: " << total_requests << "\n"; - asio::io_context ioc(num_threads); + asio::io_context ioc( num_threads ); std::vector clients; std::vector servers; - std::vector completed(num_connections, 0); - std::vector stats(num_connections); + std::vector completed( num_connections, 0 ); + std::vector stats( num_connections ); - clients.reserve(num_connections); - servers.reserve(num_connections); + clients.reserve( num_connections ); + servers.reserve( num_connections ); - for (int i = 0; i < num_connections; ++i) + for( int i = 0; i < num_connections; ++i ) { - auto [c, s] = make_socket_pair(ioc); - clients.push_back(std::move(c)); - servers.push_back(std::move(s)); + auto [c, s] = make_socket_pair( ioc ); + clients.push_back( std::move( c ) ); + servers.push_back( std::move( s ) ); } - // Spawn all coroutines before starting threads - for (int i = 0; i < num_connections; ++i) + for( int i = 0; i < num_connections; ++i ) { - asio::co_spawn(ioc, - server_task(servers[i], requests_per_conn, completed[i]), - asio::detached); - asio::co_spawn(ioc, - client_task(clients[i], requests_per_conn, stats[i]), - asio::detached); + asio::co_spawn( ioc, + server_task( servers[i], requests_per_conn, completed[i] ), + asio::detached ); + asio::co_spawn( ioc, + client_task( clients[i], requests_per_conn, stats[i] ), + asio::detached ); } bench::stopwatch total_sw; - // Launch worker threads std::vector threads; - threads.reserve(num_threads - 1); - for (int i = 1; i < num_threads; ++i) - threads.emplace_back([&ioc] { ioc.run(); }); + threads.reserve( num_threads - 1 ); + for( int i = 1; i < num_threads; ++i ) + threads.emplace_back( [&ioc] { ioc.run(); } ); - // Main thread also runs ioc.run(); - // Wait for all threads - for (auto& t : threads) + for( auto& t : threads ) t.join(); double elapsed = total_sw.elapsed_seconds(); - double requests_per_sec = static_cast(total_requests) / elapsed; + double requests_per_sec = static_cast( total_requests ) / elapsed; - // Aggregate latency stats double total_mean = 0; double total_p99 = 0; - for (auto& s : stats) + for( auto& s : stats ) { total_mean += s.mean(); total_p99 += s.p99(); } std::cout << " Completed: " << total_requests << " requests\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(requests_per_sec) << "\n"; + std::cout << " Throughput: " << bench::format_rate( requests_per_sec ) << "\n"; std::cout << " Avg mean latency: " - << bench::format_latency(total_mean / num_connections) << "\n"; + << bench::format_latency( total_mean / num_connections ) << "\n"; std::cout << " Avg p99 latency: " - << bench::format_latency(total_p99 / num_connections) << "\n\n"; + << bench::format_latency( total_p99 / num_connections ) << "\n\n"; - for (auto& c : clients) + for( auto& c : clients ) c.close(); - for (auto& s : servers) + for( auto& s : servers ) s.close(); - return bench::benchmark_result("multithread_" + std::to_string(num_threads) + "t") - .add("num_threads", num_threads) - .add("num_connections", num_connections) - .add("requests_per_conn", requests_per_conn) - .add("total_requests", total_requests) - .add("requests_per_sec", requests_per_sec) - .add("avg_mean_latency_us", total_mean / num_connections) - .add("avg_p99_latency_us", total_p99 / num_connections); + return bench::benchmark_result( "multithread_" + std::to_string( num_threads ) + "t" ) + .add( "num_threads", num_threads ) + .add( "num_connections", num_connections ) + .add( "requests_per_conn", requests_per_conn ) + .add( "total_requests", total_requests ) + .add( "requests_per_sec", requests_per_sec ) + .add( "avg_mean_latency_us", total_mean / num_connections ) + .add( "avg_p99_latency_us", total_p99 / num_connections ); } -void run_benchmarks(char const* output_file, char const* bench_filter) -{ - std::cout << "Boost.Asio HTTP Server Benchmarks\n"; - std::cout << "=================================\n"; - - bench::result_collector collector("asio"); +} // anonymous namespace - bool run_all = !bench_filter || std::strcmp(bench_filter, "all") == 0; +void run_http_server_benchmarks( + bench::result_collector& collector, + char const* filter ) +{ + std::cout << "\n>>> HTTP Server Benchmarks (Asio) <<<\n"; - if (run_all || std::strcmp(bench_filter, "single_conn") == 0) - { - bench::print_header("Single Connection (Sequential Requests)"); - collector.add(bench_single_connection(10000)); - } + bool run_all = !filter || std::strcmp( filter, "all" ) == 0; - if (run_all || std::strcmp(bench_filter, "concurrent") == 0) + if( run_all || std::strcmp( filter, "single_conn" ) == 0 ) { - if (run_all) - std::this_thread::sleep_for(std::chrono::seconds(5)); - bench::print_header("Concurrent Connections"); - collector.add(bench_concurrent_connections(1, 10000)); - collector.add(bench_concurrent_connections(4, 2500)); - collector.add(bench_concurrent_connections(16, 625)); - collector.add(bench_concurrent_connections(32, 312)); + bench::print_header( "Single Connection (Sequential Requests)" ); + collector.add( bench_single_connection( 10000 ) ); } - if (run_all || std::strcmp(bench_filter, "multithread") == 0) + if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) { - if (run_all) - std::this_thread::sleep_for(std::chrono::seconds(5)); - bench::print_header("Multi-threaded (32 connections, varying threads)"); - collector.add(bench_multithread(1, 32, 312)); - collector.add(bench_multithread(2, 32, 312)); - collector.add(bench_multithread(4, 32, 312)); - collector.add(bench_multithread(8, 32, 312)); + if( run_all ) + std::this_thread::sleep_for( std::chrono::seconds( 5 ) ); + bench::print_header( "Concurrent Connections" ); + collector.add( bench_concurrent_connections( 1, 10000 ) ); + collector.add( bench_concurrent_connections( 4, 2500 ) ); + collector.add( bench_concurrent_connections( 16, 625 ) ); + collector.add( bench_concurrent_connections( 32, 312 ) ); } - std::cout << "\nBenchmarks complete.\n"; - - if (output_file) + if( run_all || std::strcmp( filter, "multithread" ) == 0 ) { - if (collector.write_json(output_file)) - std::cout << "Results written to: " << output_file << "\n"; - else - std::cerr << "Error: Failed to write results to: " << output_file << "\n"; + if( run_all ) + std::this_thread::sleep_for( std::chrono::seconds( 5 ) ); + bench::print_header( "Multi-threaded (32 connections, varying threads)" ); + collector.add( bench_multithread( 1, 32, 312 ) ); + collector.add( bench_multithread( 2, 32, 312 ) ); + collector.add( bench_multithread( 4, 32, 312 ) ); + collector.add( bench_multithread( 8, 32, 312 ) ); } } -void print_usage(char const* program_name) -{ - std::cout << "Usage: " << program_name << " [OPTIONS]\n\n"; - std::cout << "Options:\n"; - std::cout << " --bench Run only the specified benchmark\n"; - std::cout << " --output Write JSON results to file\n"; - std::cout << " --help Show this help message\n"; - std::cout << "\n"; - std::cout << "Available benchmarks:\n"; - std::cout << " single_conn Single connection, sequential requests\n"; - std::cout << " concurrent Multiple concurrent connections\n"; - std::cout << " multithread Multi-threaded with varying thread counts\n"; - std::cout << " all Run all benchmarks (default)\n"; -} - -int main(int argc, char* argv[]) -{ - char const* output_file = nullptr; - char const* bench_filter = nullptr; - - for (int i = 1; i < argc; ++i) - { - if (std::strcmp(argv[i], "--bench") == 0) - { - if (i + 1 < argc) - { - bench_filter = argv[++i]; - } - else - { - std::cerr << "Error: --bench requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--output") == 0) - { - if (i + 1 < argc) - { - output_file = argv[++i]; - } - else - { - std::cerr << "Error: --output requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--help") == 0 || std::strcmp(argv[i], "-h") == 0) - { - print_usage(argv[0]); - return 0; - } - else - { - std::cerr << "Unknown option: " << argv[i] << "\n"; - print_usage(argv[0]); - return 1; - } - } - - run_benchmarks(output_file, bench_filter); - return 0; -} +} // namespace asio_bench diff --git a/bench/asio/io_context_bench.cpp b/bench/asio/io_context_bench.cpp index 616645c6..987768bd 100644 --- a/bench/asio/io_context_bench.cpp +++ b/bench/asio/io_context_bench.cpp @@ -7,8 +7,7 @@ // Official repository: https://github.com/cppalliance/corosio // -// This benchmark uses coroutines (like Corosio) for a fair comparison, -// rather than plain callbacks. +#include "benchmarks.hpp" #include #include @@ -26,108 +25,100 @@ namespace asio = boost::asio; -// Coroutine that increments a counter -asio::awaitable increment_task(int& counter) +namespace asio_bench { +namespace { + +asio::awaitable increment_task( int& counter ) { ++counter; co_return; } -// Coroutine that increments an atomic counter -asio::awaitable atomic_increment_task(std::atomic& counter) +asio::awaitable atomic_increment_task( std::atomic& counter ) { - counter.fetch_add(1, std::memory_order_relaxed); + counter.fetch_add( 1, std::memory_order_relaxed ); co_return; } -// Measures single-threaded coroutine throughput using Asio's awaitable/co_spawn. -// This is a direct apples-to-apples comparison with Corosio since both use C++20 -// coroutines. Differences reveal the overhead of each framework's coroutine -// integration rather than callback vs. coroutine differences. -bench::benchmark_result bench_single_threaded_post(int num_handlers) +bench::benchmark_result bench_single_threaded_post( int num_handlers ) { - bench::print_header("Single-threaded Handler Post (Asio)"); + bench::print_header( "Single-threaded Handler Post (Asio)" ); asio::io_context ioc; int counter = 0; bench::stopwatch sw; - for (int i = 0; i < num_handlers; ++i) - asio::co_spawn(ioc, increment_task(counter), asio::detached); + for( int i = 0; i < num_handlers; ++i ) + asio::co_spawn( ioc, increment_task( counter ), asio::detached ); ioc.run(); double elapsed = sw.elapsed_seconds(); - double ops_per_sec = static_cast(num_handlers) / elapsed; + double ops_per_sec = static_cast( num_handlers ) / elapsed; std::cout << " Handlers: " << num_handlers << "\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(ops_per_sec) << "\n"; + std::cout << " Throughput: " << bench::format_rate( ops_per_sec ) << "\n"; - if (counter != num_handlers) + if( counter != num_handlers ) { std::cerr << " ERROR: counter mismatch! Expected " << num_handlers << ", got " << counter << "\n"; } - return bench::benchmark_result("single_threaded_post") - .add("handlers", num_handlers) - .add("elapsed_s", elapsed) - .add("ops_per_sec", ops_per_sec); + return bench::benchmark_result( "single_threaded_post" ) + .add( "handlers", num_handlers ) + .add( "elapsed_s", elapsed ) + .add( "ops_per_sec", ops_per_sec ); } -// Measures multi-threaded scaling using Asio coroutines. Tests how Asio's -// scheduler handles coroutine resumption across threads. Compare against Corosio -// to evaluate coroutine dispatch efficiency under thread contention. -bench::benchmark_result bench_multithreaded_scaling(int num_handlers, int max_threads) +bench::benchmark_result bench_multithreaded_scaling( int num_handlers, int max_threads ) { - bench::print_header("Multi-threaded Scaling (Asio Coroutines)"); + bench::print_header( "Multi-threaded Scaling (Asio Coroutines)" ); std::cout << " Handlers per test: " << num_handlers << "\n\n"; - bench::benchmark_result result("multithreaded_scaling"); - result.add("handlers", num_handlers); + bench::benchmark_result result( "multithreaded_scaling" ); + result.add( "handlers", num_handlers ); double baseline_ops = 0; - for (int num_threads = 1; num_threads <= max_threads; num_threads *= 2) + for( int num_threads = 1; num_threads <= max_threads; num_threads *= 2 ) { asio::io_context ioc; - std::atomic counter{0}; + std::atomic counter{ 0 }; - // Post all coroutines first - for (int i = 0; i < num_handlers; ++i) - asio::co_spawn(ioc, atomic_increment_task(counter), asio::detached); + for( int i = 0; i < num_handlers; ++i ) + asio::co_spawn( ioc, atomic_increment_task( counter ), asio::detached ); bench::stopwatch sw; - // Run with multiple threads std::vector runners; - for (int t = 0; t < num_threads; ++t) - runners.emplace_back([&ioc]() { ioc.run(); }); + for( int t = 0; t < num_threads; ++t ) + runners.emplace_back( [&ioc]() { ioc.run(); } ); - for (auto& t : runners) + for( auto& t : runners ) t.join(); double elapsed = sw.elapsed_seconds(); - double ops_per_sec = static_cast(num_handlers) / elapsed; + double ops_per_sec = static_cast( num_handlers ) / elapsed; std::cout << " " << num_threads << " thread(s): " - << bench::format_rate(ops_per_sec); + << bench::format_rate( ops_per_sec ); - if (num_threads == 1) + if( num_threads == 1 ) baseline_ops = ops_per_sec; - else if (baseline_ops > 0) - std::cout << " (speedup: " << std::fixed << std::setprecision(2) - << (ops_per_sec / baseline_ops) << "x)"; + else if( baseline_ops > 0 ) + std::cout << " (speedup: " << std::fixed << std::setprecision( 2 ) + << ( ops_per_sec / baseline_ops ) << "x)"; std::cout << "\n"; - result.add("threads_" + std::to_string(num_threads) + "_ops_per_sec", ops_per_sec); + result.add( "threads_" + std::to_string( num_threads ) + "_ops_per_sec", ops_per_sec ); - if (counter.load() != num_handlers) + if( counter.load() != num_handlers ) { std::cerr << " ERROR: counter mismatch! Expected " << num_handlers << ", got " << counter.load() << "\n"; @@ -137,12 +128,9 @@ bench::benchmark_result bench_multithreaded_scaling(int num_handlers, int max_th return result; } -// Measures poll() efficiency with Asio coroutines in a game-loop pattern. -// Tests how Asio handles frequent context restarts with coroutine-based work. -// Compare against Corosio for latency-sensitive polling scenarios. -bench::benchmark_result bench_interleaved_post_run(int iterations, int handlers_per_iteration) +bench::benchmark_result bench_interleaved_post_run( int iterations, int handlers_per_iteration ) { - bench::print_header("Interleaved Post/Run (Asio Coroutines)"); + bench::print_header( "Interleaved Post/Run (Asio Coroutines)" ); asio::io_context ioc; int counter = 0; @@ -150,197 +138,119 @@ bench::benchmark_result bench_interleaved_post_run(int iterations, int handlers_ bench::stopwatch sw; - for (int iter = 0; iter < iterations; ++iter) + for( int iter = 0; iter < iterations; ++iter ) { - for (int i = 0; i < handlers_per_iteration; ++i) - asio::co_spawn(ioc, increment_task(counter), asio::detached); + for( int i = 0; i < handlers_per_iteration; ++i ) + asio::co_spawn( ioc, increment_task( counter ), asio::detached ); ioc.poll(); ioc.restart(); } - // Run any remaining handlers ioc.run(); double elapsed = sw.elapsed_seconds(); - double ops_per_sec = static_cast(total_handlers) / elapsed; + double ops_per_sec = static_cast( total_handlers ) / elapsed; std::cout << " Iterations: " << iterations << "\n"; std::cout << " Handlers/iter: " << handlers_per_iteration << "\n"; std::cout << " Total handlers: " << total_handlers << "\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(ops_per_sec) << "\n"; + std::cout << " Throughput: " << bench::format_rate( ops_per_sec ) << "\n"; - if (counter != total_handlers) + if( counter != total_handlers ) { std::cerr << " ERROR: counter mismatch! Expected " << total_handlers << ", got " << counter << "\n"; } - return bench::benchmark_result("interleaved_post_run") - .add("iterations", iterations) - .add("handlers_per_iteration", handlers_per_iteration) - .add("total_handlers", total_handlers) - .add("elapsed_s", elapsed) - .add("ops_per_sec", ops_per_sec); + return bench::benchmark_result( "interleaved_post_run" ) + .add( "iterations", iterations ) + .add( "handlers_per_iteration", handlers_per_iteration ) + .add( "total_handlers", total_handlers ) + .add( "elapsed_s", elapsed ) + .add( "ops_per_sec", ops_per_sec ); } -// Measures Asio coroutine performance under concurrent producer-consumer load. -// Multiple threads spawn and execute coroutines simultaneously. Compare against -// Corosio to evaluate coroutine dispatch under realistic server workloads. -bench::benchmark_result bench_concurrent_post_run(int num_threads, int handlers_per_thread) +bench::benchmark_result bench_concurrent_post_run( int num_threads, int handlers_per_thread ) { - bench::print_header("Concurrent Post and Run (Asio Coroutines)"); + bench::print_header( "Concurrent Post and Run (Asio Coroutines)" ); asio::io_context ioc; - std::atomic counter{0}; + std::atomic counter{ 0 }; int total_handlers = num_threads * handlers_per_thread; bench::stopwatch sw; - // Launch threads that both post and run std::vector workers; - for (int t = 0; t < num_threads; ++t) + for( int t = 0; t < num_threads; ++t ) { - workers.emplace_back([&ioc, &counter, handlers_per_thread]() + workers.emplace_back( [&ioc, &counter, handlers_per_thread]() { - for (int i = 0; i < handlers_per_thread; ++i) - asio::co_spawn(ioc, atomic_increment_task(counter), asio::detached); + for( int i = 0; i < handlers_per_thread; ++i ) + asio::co_spawn( ioc, atomic_increment_task( counter ), asio::detached ); ioc.run(); - }); + } ); } - for (auto& t : workers) + for( auto& t : workers ) t.join(); double elapsed = sw.elapsed_seconds(); - double ops_per_sec = static_cast(total_handlers) / elapsed; + double ops_per_sec = static_cast( total_handlers ) / elapsed; std::cout << " Threads: " << num_threads << "\n"; std::cout << " Handlers/thread: " << handlers_per_thread << "\n"; std::cout << " Total handlers: " << total_handlers << "\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(ops_per_sec) << "\n"; + std::cout << " Throughput: " << bench::format_rate( ops_per_sec ) << "\n"; - if (counter.load() != total_handlers) + if( counter.load() != total_handlers ) { std::cerr << " ERROR: counter mismatch! Expected " << total_handlers << ", got " << counter.load() << "\n"; } - return bench::benchmark_result("concurrent_post_run") - .add("threads", num_threads) - .add("handlers_per_thread", handlers_per_thread) - .add("total_handlers", total_handlers) - .add("elapsed_s", elapsed) - .add("ops_per_sec", ops_per_sec); + return bench::benchmark_result( "concurrent_post_run" ) + .add( "threads", num_threads ) + .add( "handlers_per_thread", handlers_per_thread ) + .add( "total_handlers", total_handlers ) + .add( "elapsed_s", elapsed ) + .add( "ops_per_sec", ops_per_sec ); } -// Run benchmarks -void run_benchmarks(const char* output_file, const char* bench_filter) -{ - std::cout << "Boost.Asio io_context Benchmarks\n"; - std::cout << "=================================\n\n"; +} // anonymous namespace - bench::result_collector collector("asio"); +void run_io_context_benchmarks( + bench::result_collector& collector, + char const* filter ) +{ + std::cout << "\n>>> io_context Benchmarks (Asio) <<<\n"; - bool run_all = !bench_filter || std::strcmp(bench_filter, "all") == 0; + bool run_all = !filter || std::strcmp( filter, "all" ) == 0; // Warm up { asio::io_context ioc; int counter = 0; - for (int i = 0; i < 1000; ++i) - asio::co_spawn(ioc, increment_task(counter), asio::detached); + for( int i = 0; i < 1000; ++i ) + asio::co_spawn( ioc, increment_task( counter ), asio::detached ); ioc.run(); } - // Run selected benchmarks - if (run_all || std::strcmp(bench_filter, "single_threaded") == 0) - collector.add(bench_single_threaded_post(1000000)); - - if (run_all || std::strcmp(bench_filter, "multithreaded") == 0) - collector.add(bench_multithreaded_scaling(1000000, 8)); - - if (run_all || std::strcmp(bench_filter, "interleaved") == 0) - collector.add(bench_interleaved_post_run(10000, 100)); - - if (run_all || std::strcmp(bench_filter, "concurrent") == 0) - collector.add(bench_concurrent_post_run(4, 250000)); + if( run_all || std::strcmp( filter, "single_threaded" ) == 0 ) + collector.add( bench_single_threaded_post( 1000000 ) ); - std::cout << "\nBenchmarks complete.\n"; + if( run_all || std::strcmp( filter, "multithreaded" ) == 0 ) + collector.add( bench_multithreaded_scaling( 1000000, 8 ) ); - if (output_file) - { - if (collector.write_json(output_file)) - std::cout << "Results written to: " << output_file << "\n"; - else - std::cerr << "Error: Failed to write results to: " << output_file << "\n"; - } -} + if( run_all || std::strcmp( filter, "interleaved" ) == 0 ) + collector.add( bench_interleaved_post_run( 10000, 100 ) ); -void print_usage(const char* program_name) -{ - std::cout << "Usage: " << program_name << " [OPTIONS]\n\n"; - std::cout << "Options:\n"; - std::cout << " --bench Run only the specified benchmark\n"; - std::cout << " --output Write JSON results to file\n"; - std::cout << " --help Show this help message\n"; - std::cout << "\n"; - std::cout << "Available benchmarks:\n"; - std::cout << " single_threaded Single-threaded coroutine post throughput\n"; - std::cout << " multithreaded Multi-threaded scaling test\n"; - std::cout << " interleaved Interleaved post/poll pattern\n"; - std::cout << " concurrent Concurrent post and run\n"; - std::cout << " all Run all benchmarks (default)\n"; + if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) + collector.add( bench_concurrent_post_run( 4, 250000 ) ); } -int main(int argc, char* argv[]) -{ - const char* output_file = nullptr; - const char* bench_filter = nullptr; - - for (int i = 1; i < argc; ++i) - { - if (std::strcmp(argv[i], "--bench") == 0) - { - if (i + 1 < argc) - { - bench_filter = argv[++i]; - } - else - { - std::cerr << "Error: --bench requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--output") == 0) - { - if (i + 1 < argc) - { - output_file = argv[++i]; - } - else - { - std::cerr << "Error: --output requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--help") == 0 || std::strcmp(argv[i], "-h") == 0) - { - print_usage(argv[0]); - return 0; - } - else - { - std::cerr << "Unknown option: " << argv[i] << "\n"; - print_usage(argv[0]); - return 1; - } - } - - run_benchmarks(output_file, bench_filter); - return 0; -} +} // namespace asio_bench diff --git a/bench/asio/main.cpp b/bench/asio/main.cpp new file mode 100644 index 00000000..c124cb21 --- /dev/null +++ b/bench/asio/main.cpp @@ -0,0 +1,138 @@ +// +// Copyright (c) 2026 Steve Gerbino +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#include "benchmarks.hpp" + +#include +#include + +#include "../common/benchmark.hpp" + +namespace { + +void run_benchmarks( + char const* output_file, + char const* category_filter, + char const* bench_filter ) +{ + std::cout << "Boost.Asio Benchmarks\n"; + std::cout << "=====================\n"; + + bench::result_collector collector( "asio" ); + + bool run_all = !category_filter || std::strcmp( category_filter, "all" ) == 0; + + if( run_all || std::strcmp( category_filter, "io_context" ) == 0 ) + asio_bench::run_io_context_benchmarks( collector, bench_filter ); + + if( run_all || std::strcmp( category_filter, "socket_throughput" ) == 0 ) + asio_bench::run_socket_throughput_benchmarks( collector, bench_filter ); + + if( run_all || std::strcmp( category_filter, "socket_latency" ) == 0 ) + asio_bench::run_socket_latency_benchmarks( collector, bench_filter ); + + if( run_all || std::strcmp( category_filter, "http_server" ) == 0 ) + asio_bench::run_http_server_benchmarks( collector, bench_filter ); + + std::cout << "\nBenchmarks complete.\n"; + + if( output_file ) + { + if( collector.write_json( output_file ) ) + std::cout << "Results written to: " << output_file << "\n"; + else + std::cerr << "Error: Failed to write results to: " << output_file << "\n"; + } +} + +void print_usage( char const* program_name ) +{ + std::cout << "Usage: " << program_name << " [OPTIONS]\n\n"; + std::cout << "Options:\n"; + std::cout << " --category Run only the specified benchmark category\n"; + std::cout << " --bench Run only the specified benchmark within category\n"; + std::cout << " --output Write JSON results to file\n"; + std::cout << " --help Show this help message\n"; + std::cout << "\n"; + std::cout << "Benchmark categories:\n"; + std::cout << " io_context io_context handler throughput tests\n"; + std::cout << " socket_throughput Socket throughput tests\n"; + std::cout << " socket_latency Socket latency tests\n"; + std::cout << " http_server HTTP server benchmarks\n"; + std::cout << " all Run all categories (default)\n"; + std::cout << "\n"; + std::cout << "Individual benchmarks (--bench):\n"; + std::cout << " io_context: single_threaded, multithreaded, interleaved, concurrent\n"; + std::cout << " socket_throughput: unidirectional, bidirectional\n"; + std::cout << " socket_latency: pingpong, concurrent\n"; + std::cout << " http_server: single_conn, concurrent, multithread\n"; +} + +} // anonymous namespace + +int main( int argc, char* argv[] ) +{ + char const* output_file = nullptr; + char const* category_filter = nullptr; + char const* bench_filter = nullptr; + + for( int i = 1; i < argc; ++i ) + { + if( std::strcmp( argv[i], "--category" ) == 0 ) + { + if( i + 1 < argc ) + { + category_filter = argv[++i]; + } + else + { + std::cerr << "Error: --category requires an argument\n"; + return 1; + } + } + else if( std::strcmp( argv[i], "--bench" ) == 0 ) + { + if( i + 1 < argc ) + { + bench_filter = argv[++i]; + } + else + { + std::cerr << "Error: --bench requires an argument\n"; + return 1; + } + } + else if( std::strcmp( argv[i], "--output" ) == 0 ) + { + if( i + 1 < argc ) + { + output_file = argv[++i]; + } + else + { + std::cerr << "Error: --output requires an argument\n"; + return 1; + } + } + else if( std::strcmp( argv[i], "--help" ) == 0 || std::strcmp( argv[i], "-h" ) == 0 ) + { + print_usage( argv[0] ); + return 0; + } + else + { + std::cerr << "Unknown option: " << argv[i] << "\n"; + print_usage( argv[0] ); + return 1; + } + } + + run_benchmarks( output_file, category_filter, bench_filter ); + return 0; +} diff --git a/bench/asio/socket_latency_bench.cpp b/bench/asio/socket_latency_bench.cpp index 51f52d39..a8a73453 100644 --- a/bench/asio/socket_latency_bench.cpp +++ b/bench/asio/socket_latency_bench.cpp @@ -7,8 +7,9 @@ // Official repository: https://github.com/cppalliance/corosio // -#include -#include +#include "benchmarks.hpp" +#include "socket_utils.hpp" + #include #include #include @@ -23,113 +24,80 @@ #include "../common/benchmark.hpp" -namespace asio = boost::asio; -using tcp = asio::ip::tcp; - -// Create a connected socket pair using TCP loopback -std::pair make_socket_pair(asio::io_context& ioc) -{ - tcp::acceptor acceptor(ioc, tcp::endpoint(tcp::v4(), 0)); - acceptor.set_option(tcp::acceptor::reuse_address(true)); - - tcp::socket client(ioc); - tcp::socket server(ioc); - - auto endpoint = acceptor.local_endpoint(); - client.connect(tcp::endpoint(asio::ip::address_v4::loopback(), endpoint.port())); - server = acceptor.accept(); - - // Disable Nagle's algorithm for low latency - client.set_option(tcp::no_delay(true)); - server.set_option(tcp::no_delay(true)); - - return {std::move(client), std::move(server)}; -} +namespace asio_bench { +namespace { -// Ping-pong coroutine task asio::awaitable pingpong_task( tcp::socket& client, tcp::socket& server, std::size_t message_size, int iterations, - bench::statistics& stats) + bench::statistics& stats ) { - std::vector send_buf(message_size, 'P'); - std::vector recv_buf(message_size); + std::vector send_buf( message_size, 'P' ); + std::vector recv_buf( message_size ); try { - for (int i = 0; i < iterations; ++i) + for( int i = 0; i < iterations; ++i ) { bench::stopwatch sw; - // Client sends ping co_await asio::async_write( client, - asio::buffer(send_buf.data(), send_buf.size()), - asio::use_awaitable); + asio::buffer( send_buf.data(), send_buf.size() ), + asio::use_awaitable ); - // Server receives ping co_await asio::async_read( server, - asio::buffer(recv_buf.data(), recv_buf.size()), - asio::use_awaitable); + asio::buffer( recv_buf.data(), recv_buf.size() ), + asio::use_awaitable ); - // Server sends pong co_await asio::async_write( server, - asio::buffer(recv_buf.data(), recv_buf.size()), - asio::use_awaitable); + asio::buffer( recv_buf.data(), recv_buf.size() ), + asio::use_awaitable ); - // Client receives pong co_await asio::async_read( client, - asio::buffer(recv_buf.data(), recv_buf.size()), - asio::use_awaitable); + asio::buffer( recv_buf.data(), recv_buf.size() ), + asio::use_awaitable ); double rtt_us = sw.elapsed_us(); - stats.add(rtt_us); + stats.add( rtt_us ); } } - catch (std::exception const&) {} + catch( std::exception const& ) {} } -// Measures Asio's round-trip latency for request-response patterns. Uses coroutines -// for fair comparison with Corosio. Reports mean and tail latencies (p99, p99.9). -// Compare against Corosio to evaluate which framework achieves lower latency for -// RPC-style protocols. -bench::benchmark_result bench_pingpong_latency(std::size_t message_size, int iterations) +bench::benchmark_result bench_pingpong_latency( std::size_t message_size, int iterations ) { std::cout << " Message size: " << message_size << " bytes, "; std::cout << "Iterations: " << iterations << "\n"; asio::io_context ioc; - auto [client, server] = make_socket_pair(ioc); + auto [client, server] = make_socket_pair( ioc ); bench::statistics latency_stats; - asio::co_spawn(ioc, - pingpong_task(client, server, message_size, iterations, latency_stats), - asio::detached); + asio::co_spawn( ioc, + pingpong_task( client, server, message_size, iterations, latency_stats ), + asio::detached ); ioc.run(); - bench::print_latency_stats(latency_stats, "Round-trip latency"); + bench::print_latency_stats( latency_stats, "Round-trip latency" ); std::cout << "\n"; client.close(); server.close(); - return bench::benchmark_result("pingpong_" + std::to_string(message_size)) - .add("message_size", static_cast(message_size)) - .add("iterations", iterations) - .add_latency_stats("rtt", latency_stats); + return bench::benchmark_result( "pingpong_" + std::to_string( message_size ) ) + .add( "message_size", static_cast( message_size ) ) + .add( "iterations", iterations ) + .add_latency_stats( "rtt", latency_stats ); } -// Measures Asio's latency degradation under concurrent connection load. Multiple -// socket pairs perform ping-pong simultaneously. Compare against Corosio to -// evaluate which framework maintains lower latency as connection count increases. -// Critical for understanding scalability limits. -bench::benchmark_result bench_concurrent_latency(int num_pairs, std::size_t message_size, int iterations) +bench::benchmark_result bench_concurrent_latency( int num_pairs, std::size_t message_size, int iterations ) { std::cout << " Concurrent pairs: " << num_pairs << ", "; std::cout << "Message size: " << message_size << " bytes, "; @@ -137,166 +105,92 @@ bench::benchmark_result bench_concurrent_latency(int num_pairs, std::size_t mess asio::io_context ioc; - // Store sockets and stats separately for safe reference passing std::vector clients; std::vector servers; - std::vector stats(num_pairs); + std::vector stats( num_pairs ); - clients.reserve(num_pairs); - servers.reserve(num_pairs); + clients.reserve( num_pairs ); + servers.reserve( num_pairs ); - for (int i = 0; i < num_pairs; ++i) + for( int i = 0; i < num_pairs; ++i ) { - auto [c, s] = make_socket_pair(ioc); - clients.push_back(std::move(c)); - servers.push_back(std::move(s)); + auto [c, s] = make_socket_pair( ioc ); + clients.push_back( std::move( c ) ); + servers.push_back( std::move( s ) ); } - // Launch concurrent ping-pong tasks - for (int p = 0; p < num_pairs; ++p) + for( int p = 0; p < num_pairs; ++p ) { - asio::co_spawn(ioc, - pingpong_task(clients[p], servers[p], message_size, iterations, stats[p]), - asio::detached); + asio::co_spawn( ioc, + pingpong_task( clients[p], servers[p], message_size, iterations, stats[p] ), + asio::detached ); } ioc.run(); std::cout << " Per-pair results:\n"; - for (int i = 0; i < num_pairs && i < 3; ++i) + for( int i = 0; i < num_pairs && i < 3; ++i ) { std::cout << " Pair " << i << ": mean=" - << bench::format_latency(stats[i].mean()) - << ", p99=" << bench::format_latency(stats[i].p99()) + << bench::format_latency( stats[i].mean() ) + << ", p99=" << bench::format_latency( stats[i].p99() ) << "\n"; } - if (num_pairs > 3) - std::cout << " ... (" << (num_pairs - 3) << " more pairs)\n"; + if( num_pairs > 3 ) + std::cout << " ... (" << ( num_pairs - 3 ) << " more pairs)\n"; - // Calculate average across all pairs double total_mean = 0; double total_p99 = 0; - for (auto& s : stats) + for( auto& s : stats ) { total_mean += s.mean(); total_p99 += s.p99(); } std::cout << " Average mean latency: " - << bench::format_latency(total_mean / num_pairs) << "\n"; + << bench::format_latency( total_mean / num_pairs ) << "\n"; std::cout << " Average p99 latency: " - << bench::format_latency(total_p99 / num_pairs) << "\n\n"; + << bench::format_latency( total_p99 / num_pairs ) << "\n\n"; - for (auto& c : clients) + for( auto& c : clients ) c.close(); - for (auto& s : servers) + for( auto& s : servers ) s.close(); - return bench::benchmark_result("concurrent_" + std::to_string(num_pairs) + "_pairs") - .add("num_pairs", num_pairs) - .add("message_size", static_cast(message_size)) - .add("iterations", iterations) - .add("avg_mean_latency_us", total_mean / num_pairs) - .add("avg_p99_latency_us", total_p99 / num_pairs); + return bench::benchmark_result( "concurrent_" + std::to_string( num_pairs ) + "_pairs" ) + .add( "num_pairs", num_pairs ) + .add( "message_size", static_cast( message_size ) ) + .add( "iterations", iterations ) + .add( "avg_mean_latency_us", total_mean / num_pairs ) + .add( "avg_p99_latency_us", total_p99 / num_pairs ); } -// Run benchmarks -void run_benchmarks(const char* output_file, const char* bench_filter) -{ - std::cout << "Boost.Asio Socket Latency Benchmarks\n"; - std::cout << "====================================\n"; +} // anonymous namespace - bench::result_collector collector("asio"); +void run_socket_latency_benchmarks( + bench::result_collector& collector, + char const* filter ) +{ + std::cout << "\n>>> Socket Latency Benchmarks (Asio) <<<\n"; - bool run_all = !bench_filter || std::strcmp(bench_filter, "all") == 0; + bool run_all = !filter || std::strcmp( filter, "all" ) == 0; - // Variable message sizes - std::vector message_sizes = {1, 64, 1024}; + std::vector message_sizes = { 1, 64, 1024 }; int iterations = 1000; - if (run_all || std::strcmp(bench_filter, "pingpong") == 0) - { - bench::print_header("Ping-Pong Round-Trip Latency (Asio)"); - for (auto size : message_sizes) - collector.add(bench_pingpong_latency(size, iterations)); - } - - if (run_all || std::strcmp(bench_filter, "concurrent") == 0) + if( run_all || std::strcmp( filter, "pingpong" ) == 0 ) { - bench::print_header("Concurrent Socket Pairs Latency (Asio)"); - collector.add(bench_concurrent_latency(1, 64, 1000)); - collector.add(bench_concurrent_latency(4, 64, 500)); - collector.add(bench_concurrent_latency(16, 64, 250)); + bench::print_header( "Ping-Pong Round-Trip Latency (Asio)" ); + for( auto size : message_sizes ) + collector.add( bench_pingpong_latency( size, iterations ) ); } - std::cout << "\nBenchmarks complete.\n"; - - if (output_file) + if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) { - if (collector.write_json(output_file)) - std::cout << "Results written to: " << output_file << "\n"; - else - std::cerr << "Error: Failed to write results to: " << output_file << "\n"; + bench::print_header( "Concurrent Socket Pairs Latency (Asio)" ); + collector.add( bench_concurrent_latency( 1, 64, 1000 ) ); + collector.add( bench_concurrent_latency( 4, 64, 500 ) ); + collector.add( bench_concurrent_latency( 16, 64, 250 ) ); } } -void print_usage(const char* program_name) -{ - std::cout << "Usage: " << program_name << " [OPTIONS]\n\n"; - std::cout << "Options:\n"; - std::cout << " --bench Run only the specified benchmark\n"; - std::cout << " --output Write JSON results to file\n"; - std::cout << " --help Show this help message\n"; - std::cout << "\n"; - std::cout << "Available benchmarks:\n"; - std::cout << " pingpong Ping-pong round-trip latency (various message sizes)\n"; - std::cout << " concurrent Concurrent socket pairs latency\n"; - std::cout << " all Run all benchmarks (default)\n"; -} - -int main(int argc, char* argv[]) -{ - const char* output_file = nullptr; - const char* bench_filter = nullptr; - - for (int i = 1; i < argc; ++i) - { - if (std::strcmp(argv[i], "--bench") == 0) - { - if (i + 1 < argc) - { - bench_filter = argv[++i]; - } - else - { - std::cerr << "Error: --bench requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--output") == 0) - { - if (i + 1 < argc) - { - output_file = argv[++i]; - } - else - { - std::cerr << "Error: --output requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--help") == 0 || std::strcmp(argv[i], "-h") == 0) - { - print_usage(argv[0]); - return 0; - } - else - { - std::cerr << "Unknown option: " << argv[i] << "\n"; - print_usage(argv[0]); - return 1; - } - } - - run_benchmarks(output_file, bench_filter); - return 0; -} +} // namespace asio_bench diff --git a/bench/asio/socket_throughput_bench.cpp b/bench/asio/socket_throughput_bench.cpp index 59918efb..c2c51df3 100644 --- a/bench/asio/socket_throughput_bench.cpp +++ b/bench/asio/socket_throughput_bench.cpp @@ -7,8 +7,9 @@ // Official repository: https://github.com/cppalliance/corosio // -#include -#include +#include "benchmarks.hpp" +#include "socket_utils.hpp" + #include #include #include @@ -16,7 +17,6 @@ #include #include #include -#include #include #include @@ -24,329 +24,225 @@ #include "../common/benchmark.hpp" -namespace asio = boost::asio; -using tcp = asio::ip::tcp; - -// Create a connected socket pair using TCP loopback -std::pair make_socket_pair(asio::io_context& ioc) -{ - tcp::acceptor acceptor(ioc, tcp::endpoint(tcp::v4(), 0)); - acceptor.set_option(tcp::acceptor::reuse_address(true)); - - tcp::socket client(ioc); - tcp::socket server(ioc); - - auto endpoint = acceptor.local_endpoint(); - client.connect(tcp::endpoint(asio::ip::address_v4::loopback(), endpoint.port())); - server = acceptor.accept(); +namespace asio_bench { +namespace { - // Disable Nagle's algorithm for low latency - client.set_option(tcp::no_delay(true)); - server.set_option(tcp::no_delay(true)); - - return {std::move(client), std::move(server)}; -} - -// Measures Asio's unidirectional socket throughput over loopback. Uses coroutines -// for fair comparison with Corosio. Tests async I/O efficiency across different -// buffer sizes. Compare against Corosio to evaluate which framework achieves -// higher throughput for streaming workloads. -bench::benchmark_result bench_throughput(std::size_t chunk_size, std::size_t total_bytes) +bench::benchmark_result bench_throughput( std::size_t chunk_size, std::size_t total_bytes ) { std::cout << " Buffer size: " << chunk_size << " bytes, "; - std::cout << "Transfer: " << (total_bytes / (1024 * 1024)) << " MB\n"; + std::cout << "Transfer: " << ( total_bytes / ( 1024 * 1024 ) ) << " MB\n"; asio::io_context ioc; - auto [writer, reader] = make_socket_pair(ioc); + auto [writer, reader] = make_socket_pair( ioc ); - std::vector write_buf(chunk_size, 'x'); - std::vector read_buf(chunk_size); + std::vector write_buf( chunk_size, 'x' ); + std::vector read_buf( chunk_size ); std::size_t total_written = 0; std::size_t total_read = 0; - // Writer coroutine auto write_task = [&]() -> asio::awaitable { try { - while (total_written < total_bytes) + while( total_written < total_bytes ) { - std::size_t to_write = (std::min)(chunk_size, total_bytes - total_written); + std::size_t to_write = ( std::min )( chunk_size, total_bytes - total_written ); auto n = co_await writer.async_write_some( - asio::buffer(write_buf.data(), to_write), - asio::use_awaitable); + asio::buffer( write_buf.data(), to_write ), + asio::use_awaitable ); total_written += n; } - writer.shutdown(tcp::socket::shutdown_send); + writer.shutdown( tcp::socket::shutdown_send ); } - catch (std::exception const&) {} + catch( std::exception const& ) {} }; - // Reader coroutine auto read_task = [&]() -> asio::awaitable { try { - while (total_read < total_bytes) + while( total_read < total_bytes ) { auto n = co_await reader.async_read_some( - asio::buffer(read_buf.data(), read_buf.size()), - asio::use_awaitable); - if (n == 0) + asio::buffer( read_buf.data(), read_buf.size() ), + asio::use_awaitable ); + if( n == 0 ) break; total_read += n; } } - catch (std::exception const&) {} + catch( std::exception const& ) {} }; bench::stopwatch sw; - asio::co_spawn(ioc, write_task(), asio::detached); - asio::co_spawn(ioc, read_task(), asio::detached); + asio::co_spawn( ioc, write_task(), asio::detached ); + asio::co_spawn( ioc, read_task(), asio::detached ); ioc.run(); double elapsed = sw.elapsed_seconds(); - double throughput = static_cast(total_read) / elapsed; + double throughput = static_cast( total_read ) / elapsed; std::cout << " Written: " << total_written << " bytes\n"; std::cout << " Read: " << total_read << " bytes\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_throughput(throughput) << "\n\n"; + std::cout << " Throughput: " << bench::format_throughput( throughput ) << "\n\n"; writer.close(); reader.close(); - return bench::benchmark_result("throughput_" + std::to_string(chunk_size)) - .add("chunk_size", static_cast(chunk_size)) - .add("total_bytes", static_cast(total_bytes)) - .add("bytes_written", static_cast(total_written)) - .add("bytes_read", static_cast(total_read)) - .add("elapsed_s", elapsed) - .add("throughput_bytes_per_sec", throughput); + return bench::benchmark_result( "throughput_" + std::to_string( chunk_size ) ) + .add( "chunk_size", static_cast( chunk_size ) ) + .add( "total_bytes", static_cast( total_bytes ) ) + .add( "bytes_written", static_cast( total_written ) ) + .add( "bytes_read", static_cast( total_read ) ) + .add( "elapsed_s", elapsed ) + .add( "throughput_bytes_per_sec", throughput ); } -// Measures Asio's full-duplex throughput with simultaneous send/receive. Four -// concurrent coroutines stress the scheduler's I/O multiplexing. Compare against -// Corosio for protocols requiring bidirectional data flow like WebSocket or gRPC. -bench::benchmark_result bench_bidirectional_throughput(std::size_t chunk_size, std::size_t total_bytes) +bench::benchmark_result bench_bidirectional_throughput( std::size_t chunk_size, std::size_t total_bytes ) { std::cout << " Buffer size: " << chunk_size << " bytes, "; - std::cout << "Transfer: " << (total_bytes / (1024 * 1024)) << " MB each direction\n"; + std::cout << "Transfer: " << ( total_bytes / ( 1024 * 1024 ) ) << " MB each direction\n"; asio::io_context ioc; - auto [sock1, sock2] = make_socket_pair(ioc); + auto [sock1, sock2] = make_socket_pair( ioc ); - std::vector buf1(chunk_size, 'a'); - std::vector buf2(chunk_size, 'b'); + std::vector buf1( chunk_size, 'a' ); + std::vector buf2( chunk_size, 'b' ); std::size_t written1 = 0, read1 = 0; std::size_t written2 = 0, read2 = 0; - // Socket 1 writes to socket 2 auto write1_task = [&]() -> asio::awaitable { try { - while (written1 < total_bytes) + while( written1 < total_bytes ) { - std::size_t to_write = (std::min)(chunk_size, total_bytes - written1); + std::size_t to_write = ( std::min )( chunk_size, total_bytes - written1 ); auto n = co_await sock1.async_write_some( - asio::buffer(buf1.data(), to_write), - asio::use_awaitable); + asio::buffer( buf1.data(), to_write ), + asio::use_awaitable ); written1 += n; } - sock1.shutdown(tcp::socket::shutdown_send); + sock1.shutdown( tcp::socket::shutdown_send ); } - catch (std::exception const&) {} + catch( std::exception const& ) {} }; - // Socket 2 reads from socket 1 auto read1_task = [&]() -> asio::awaitable { try { - std::vector rbuf(chunk_size); - while (read1 < total_bytes) + std::vector rbuf( chunk_size ); + while( read1 < total_bytes ) { auto n = co_await sock2.async_read_some( - asio::buffer(rbuf.data(), rbuf.size()), - asio::use_awaitable); - if (n == 0) break; + asio::buffer( rbuf.data(), rbuf.size() ), + asio::use_awaitable ); + if( n == 0 ) break; read1 += n; } } - catch (std::exception const&) {} + catch( std::exception const& ) {} }; - // Socket 2 writes to socket 1 auto write2_task = [&]() -> asio::awaitable { try { - while (written2 < total_bytes) + while( written2 < total_bytes ) { - std::size_t to_write = (std::min)(chunk_size, total_bytes - written2); + std::size_t to_write = ( std::min )( chunk_size, total_bytes - written2 ); auto n = co_await sock2.async_write_some( - asio::buffer(buf2.data(), to_write), - asio::use_awaitable); + asio::buffer( buf2.data(), to_write ), + asio::use_awaitable ); written2 += n; } - sock2.shutdown(tcp::socket::shutdown_send); + sock2.shutdown( tcp::socket::shutdown_send ); } - catch (std::exception const&) {} + catch( std::exception const& ) {} }; - // Socket 1 reads from socket 2 auto read2_task = [&]() -> asio::awaitable { try { - std::vector rbuf(chunk_size); - while (read2 < total_bytes) + std::vector rbuf( chunk_size ); + while( read2 < total_bytes ) { auto n = co_await sock1.async_read_some( - asio::buffer(rbuf.data(), rbuf.size()), - asio::use_awaitable); - if (n == 0) break; + asio::buffer( rbuf.data(), rbuf.size() ), + asio::use_awaitable ); + if( n == 0 ) break; read2 += n; } } - catch (std::exception const&) {} + catch( std::exception const& ) {} }; bench::stopwatch sw; - asio::co_spawn(ioc, write1_task(), asio::detached); - asio::co_spawn(ioc, read1_task(), asio::detached); - asio::co_spawn(ioc, write2_task(), asio::detached); - asio::co_spawn(ioc, read2_task(), asio::detached); + asio::co_spawn( ioc, write1_task(), asio::detached ); + asio::co_spawn( ioc, read1_task(), asio::detached ); + asio::co_spawn( ioc, write2_task(), asio::detached ); + asio::co_spawn( ioc, read2_task(), asio::detached ); ioc.run(); double elapsed = sw.elapsed_seconds(); std::size_t total_transferred = read1 + read2; - double throughput = static_cast(total_transferred) / elapsed; + double throughput = static_cast( total_transferred ) / elapsed; std::cout << " Direction 1: " << read1 << " bytes\n"; std::cout << " Direction 2: " << read2 << " bytes\n"; std::cout << " Total: " << total_transferred << " bytes\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_throughput(throughput) + std::cout << " Throughput: " << bench::format_throughput( throughput ) << " (combined)\n\n"; sock1.close(); sock2.close(); - return bench::benchmark_result("bidirectional_" + std::to_string(chunk_size)) - .add("chunk_size", static_cast(chunk_size)) - .add("total_bytes_per_direction", static_cast(total_bytes)) - .add("bytes_direction1", static_cast(read1)) - .add("bytes_direction2", static_cast(read2)) - .add("total_transferred", static_cast(total_transferred)) - .add("elapsed_s", elapsed) - .add("throughput_bytes_per_sec", throughput); + return bench::benchmark_result( "bidirectional_" + std::to_string( chunk_size ) ) + .add( "chunk_size", static_cast( chunk_size ) ) + .add( "total_bytes_per_direction", static_cast( total_bytes ) ) + .add( "bytes_direction1", static_cast( read1 ) ) + .add( "bytes_direction2", static_cast( read2 ) ) + .add( "total_transferred", static_cast( total_transferred ) ) + .add( "elapsed_s", elapsed ) + .add( "throughput_bytes_per_sec", throughput ); } -// Run benchmarks -void run_benchmarks(const char* output_file, const char* bench_filter) -{ - std::cout << "Boost.Asio Socket Throughput Benchmarks\n"; - std::cout << "=======================================\n"; - - bench::result_collector collector("asio"); +} // anonymous namespace - bool run_all = !bench_filter || std::strcmp(bench_filter, "all") == 0; +void run_socket_throughput_benchmarks( + bench::result_collector& collector, + char const* filter ) +{ + std::cout << "\n>>> Socket Throughput Benchmarks (Asio) <<<\n"; - // Variable buffer sizes - std::vector buffer_sizes = {1024, 4096, 16384, 65536}; - std::size_t transfer_size = 64 * 1024 * 1024; // 64 MB + bool run_all = !filter || std::strcmp( filter, "all" ) == 0; - if (run_all || std::strcmp(bench_filter, "unidirectional") == 0) - { - bench::print_header("Unidirectional Throughput (Asio)"); - for (auto size : buffer_sizes) - collector.add(bench_throughput(size, transfer_size)); - } + std::vector buffer_sizes = { 1024, 4096, 16384, 65536 }; + std::size_t transfer_size = 64 * 1024 * 1024; - if (run_all || std::strcmp(bench_filter, "bidirectional") == 0) + if( run_all || std::strcmp( filter, "unidirectional" ) == 0 ) { - bench::print_header("Bidirectional Throughput (Asio)"); - for (auto size : buffer_sizes) - collector.add(bench_bidirectional_throughput(size, transfer_size / 2)); + bench::print_header( "Unidirectional Throughput (Asio)" ); + for( auto size : buffer_sizes ) + collector.add( bench_throughput( size, transfer_size ) ); } - std::cout << "\nBenchmarks complete.\n"; - - if (output_file) + if( run_all || std::strcmp( filter, "bidirectional" ) == 0 ) { - if (collector.write_json(output_file)) - std::cout << "Results written to: " << output_file << "\n"; - else - std::cerr << "Error: Failed to write results to: " << output_file << "\n"; + bench::print_header( "Bidirectional Throughput (Asio)" ); + for( auto size : buffer_sizes ) + collector.add( bench_bidirectional_throughput( size, transfer_size / 2 ) ); } } -void print_usage(const char* program_name) -{ - std::cout << "Usage: " << program_name << " [OPTIONS]\n\n"; - std::cout << "Options:\n"; - std::cout << " --bench Run only the specified benchmark\n"; - std::cout << " --output Write JSON results to file\n"; - std::cout << " --help Show this help message\n"; - std::cout << "\n"; - std::cout << "Available benchmarks:\n"; - std::cout << " unidirectional Unidirectional throughput (various buffer sizes)\n"; - std::cout << " bidirectional Bidirectional throughput (various buffer sizes)\n"; - std::cout << " all Run all benchmarks (default)\n"; -} - -int main(int argc, char* argv[]) -{ - const char* output_file = nullptr; - const char* bench_filter = nullptr; - - for (int i = 1; i < argc; ++i) - { - if (std::strcmp(argv[i], "--bench") == 0) - { - if (i + 1 < argc) - { - bench_filter = argv[++i]; - } - else - { - std::cerr << "Error: --bench requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--output") == 0) - { - if (i + 1 < argc) - { - output_file = argv[++i]; - } - else - { - std::cerr << "Error: --output requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--help") == 0 || std::strcmp(argv[i], "-h") == 0) - { - print_usage(argv[0]); - return 0; - } - else - { - std::cerr << "Unknown option: " << argv[i] << "\n"; - print_usage(argv[0]); - return 1; - } - } - - run_benchmarks(output_file, bench_filter); - return 0; -} +} // namespace asio_bench diff --git a/bench/asio/socket_utils.hpp b/bench/asio/socket_utils.hpp new file mode 100644 index 00000000..00f112de --- /dev/null +++ b/bench/asio/socket_utils.hpp @@ -0,0 +1,44 @@ +// +// Copyright (c) 2026 Steve Gerbino +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#ifndef ASIO_BENCH_SOCKET_UTILS_HPP +#define ASIO_BENCH_SOCKET_UTILS_HPP + +#include +#include + +#include + +namespace asio_bench { + +namespace asio = boost::asio; +using tcp = asio::ip::tcp; + +/** Create a connected pair of TCP sockets for benchmarking. */ +inline std::pair make_socket_pair( asio::io_context& ioc ) +{ + tcp::acceptor acceptor( ioc, tcp::endpoint( tcp::v4(), 0 ) ); + acceptor.set_option( tcp::acceptor::reuse_address( true ) ); + + tcp::socket client( ioc ); + tcp::socket server( ioc ); + + auto endpoint = acceptor.local_endpoint(); + client.connect( tcp::endpoint( asio::ip::address_v4::loopback(), endpoint.port() ) ); + server = acceptor.accept(); + + client.set_option( tcp::no_delay( true ) ); + server.set_option( tcp::no_delay( true ) ); + + return { std::move( client ), std::move( server ) }; +} + +} // namespace asio_bench + +#endif diff --git a/bench/corosio/CMakeLists.txt b/bench/corosio/CMakeLists.txt index f42c0312..8dda6406 100644 --- a/bench/corosio/CMakeLists.txt +++ b/bench/corosio/CMakeLists.txt @@ -8,21 +8,20 @@ # Official repository: https://github.com/cppalliance/corosio # -# Corosio benchmark executables +add_executable(corosio_bench + main.cpp + io_context_bench.cpp + socket_throughput_bench.cpp + socket_latency_bench.cpp + http_server_bench.cpp) -function(corosio_add_benchmark name source) - add_executable(${name} ${source}) - target_link_libraries(${name} - PRIVATE - Boost::corosio - Threads::Threads) - set_property(TARGET ${name} PROPERTY FOLDER "benchmarks/corosio") - if (COROSIO_BENCH_LTO_SUPPORTED) - set_property(TARGET ${name} PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) - endif () -endfunction() +target_link_libraries(corosio_bench + PRIVATE + Boost::corosio + Threads::Threads) -corosio_add_benchmark(corosio_bench_io_context io_context_bench.cpp) -corosio_add_benchmark(corosio_bench_socket_throughput socket_throughput_bench.cpp) -corosio_add_benchmark(corosio_bench_socket_latency socket_latency_bench.cpp) -corosio_add_benchmark(corosio_bench_http_server http_server_bench.cpp) +set_property(TARGET corosio_bench PROPERTY FOLDER "benchmarks/corosio") + +if (COROSIO_BENCH_LTO_SUPPORTED) + set_property(TARGET corosio_bench PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) +endif () diff --git a/bench/corosio/benchmarks.hpp b/bench/corosio/benchmarks.hpp new file mode 100644 index 00000000..e3567618 --- /dev/null +++ b/bench/corosio/benchmarks.hpp @@ -0,0 +1,63 @@ +// +// Copyright (c) 2026 Steve Gerbino +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#ifndef COROSIO_BENCH_BENCHMARKS_HPP +#define COROSIO_BENCH_BENCHMARKS_HPP + +#include "../common/benchmark.hpp" + +namespace corosio_bench { + +/** Run io_context benchmarks for the given context type. + + @param collector Results collector. + @param filter Optional filter: nullptr or "all" runs all, or a specific + benchmark name (single_threaded, multithreaded, interleaved, concurrent). +*/ +template +void run_io_context_benchmarks( + bench::result_collector& collector, + char const* filter ); + +/** Run socket throughput benchmarks for the given context type. + + @param collector Results collector. + @param filter Optional filter: nullptr or "all" runs all, or a specific + benchmark name (unidirectional, bidirectional). +*/ +template +void run_socket_throughput_benchmarks( + bench::result_collector& collector, + char const* filter ); + +/** Run socket latency benchmarks for the given context type. + + @param collector Results collector. + @param filter Optional filter: nullptr or "all" runs all, or a specific + benchmark name (pingpong, concurrent). +*/ +template +void run_socket_latency_benchmarks( + bench::result_collector& collector, + char const* filter ); + +/** Run HTTP server benchmarks for the given context type. + + @param collector Results collector. + @param filter Optional filter: nullptr or "all" runs all, or a specific + benchmark name (single_conn, concurrent, multithread). +*/ +template +void run_http_server_benchmarks( + bench::result_collector& collector, + char const* filter ); + +} // namespace corosio_bench + +#endif diff --git a/bench/corosio/http_server_bench.cpp b/bench/corosio/http_server_bench.cpp index 94a087c5..8f719c7e 100644 --- a/bench/corosio/http_server_bench.cpp +++ b/bench/corosio/http_server_bench.cpp @@ -7,7 +7,10 @@ // Official repository: https://github.com/cppalliance/corosio // +#include "benchmarks.hpp" + #include +#include #include #include #include @@ -25,145 +28,137 @@ #include #include -#include "../common/backend_selection.hpp" #include "../common/benchmark.hpp" #include "../common/http_protocol.hpp" namespace corosio = boost::corosio; namespace capy = boost::capy; -// Server coroutine: reads requests and sends responses +namespace corosio_bench { +namespace { + capy::task<> server_task( corosio::tcp_socket& sock, int num_requests, - int& completed_requests) + int& completed_requests ) { std::string buf; - while (completed_requests < num_requests) + while( completed_requests < num_requests ) { - // Read until end of HTTP headers auto [ec, n] = co_await capy::read_until( - sock, capy::dynamic_buffer(buf), "\r\n\r\n"); - if (ec) + sock, capy::dynamic_buffer( buf ), "\r\n\r\n" ); + if( ec ) co_return; - // Send response auto [wec, wn] = co_await capy::write( - sock, capy::const_buffer(bench::http::small_response, bench::http::small_response_size)); - if (wec) + sock, capy::const_buffer( bench::http::small_response, bench::http::small_response_size ) ); + if( wec ) co_return; ++completed_requests; - buf.erase(0, n); + buf.erase( 0, n ); } } -// Client coroutine: sends requests and reads responses capy::task<> client_task( corosio::tcp_socket& sock, int num_requests, - bench::statistics& latency_stats) + bench::statistics& latency_stats ) { std::string buf; - for (int i = 0; i < num_requests; ++i) + for( int i = 0; i < num_requests; ++i ) { bench::stopwatch sw; - // Send request auto [wec, wn] = co_await capy::write( - sock, capy::const_buffer(bench::http::small_request, bench::http::small_request_size)); - if (wec) + sock, capy::const_buffer( bench::http::small_request, bench::http::small_request_size ) ); + if( wec ) co_return; - // Read response headers auto [ec, header_end] = co_await capy::read_until( - sock, capy::dynamic_buffer(buf), "\r\n\r\n"); - if (ec) + sock, capy::dynamic_buffer( buf ), "\r\n\r\n" ); + if( ec ) co_return; - // Parse Content-Length from headers and read body if needed - std::string_view headers(buf.data(), header_end); + std::string_view headers( buf.data(), header_end ); std::size_t content_length = 0; - auto pos = headers.find("Content-Length: "); - if (pos != std::string_view::npos) + auto pos = headers.find( "Content-Length: " ); + if( pos != std::string_view::npos ) { pos += 16; - while (pos < headers.size() && headers[pos] >= '0' && headers[pos] <= '9') + while( pos < headers.size() && headers[pos] >= '0' && headers[pos] <= '9' ) { - content_length = content_length * 10 + (headers[pos] - '0'); + content_length = content_length * 10 + ( headers[pos] - '0' ); ++pos; } } - // Read body if not already in buffer std::size_t total_size = header_end + content_length; - if (buf.size() < total_size) + if( buf.size() < total_size ) { std::size_t need = total_size - buf.size(); std::size_t old_size = buf.size(); - buf.resize(total_size); + buf.resize( total_size ); auto [rec, rn] = co_await capy::read( - sock, capy::mutable_buffer(buf.data() + old_size, need)); - if (rec) + sock, capy::mutable_buffer( buf.data() + old_size, need ) ); + if( rec ) co_return; } double latency_us = sw.elapsed_us(); - latency_stats.add(latency_us); + latency_stats.add( latency_us ); - buf.erase(0, total_size); + buf.erase( 0, total_size ); } } -// Single connection benchmark template -bench::benchmark_result bench_single_connection(int num_requests) +bench::benchmark_result bench_single_connection( int num_requests ) { std::cout << " Requests: " << num_requests << "\n"; Context ioc; - auto [client, server] = corosio::test::make_socket_pair(ioc); + auto [client, server] = corosio::test::make_socket_pair( ioc ); - client.set_no_delay(true); - server.set_no_delay(true); + client.set_no_delay( true ); + server.set_no_delay( true ); int completed_requests = 0; bench::statistics latency_stats; bench::stopwatch total_sw; - capy::run_async(ioc.get_executor())( - server_task(server, num_requests, completed_requests)); - capy::run_async(ioc.get_executor())( - client_task(client, num_requests, latency_stats)); + capy::run_async( ioc.get_executor() )( + server_task( server, num_requests, completed_requests ) ); + capy::run_async( ioc.get_executor() )( + client_task( client, num_requests, latency_stats ) ); ioc.run(); double elapsed = total_sw.elapsed_seconds(); - double requests_per_sec = static_cast(num_requests) / elapsed; + double requests_per_sec = static_cast( num_requests ) / elapsed; std::cout << " Completed: " << num_requests << " requests\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(requests_per_sec) << "\n"; - bench::print_latency_stats(latency_stats, "Request latency"); + std::cout << " Throughput: " << bench::format_rate( requests_per_sec ) << "\n"; + bench::print_latency_stats( latency_stats, "Request latency" ); std::cout << "\n"; client.close(); server.close(); - return bench::benchmark_result("single_conn") - .add("num_requests", num_requests) - .add("num_connections", 1) - .add("requests_per_sec", requests_per_sec) - .add_latency_stats("request_latency", latency_stats); + return bench::benchmark_result( "single_conn" ) + .add( "num_requests", num_requests ) + .add( "num_connections", 1 ) + .add( "requests_per_sec", requests_per_sec ) + .add_latency_stats( "request_latency", latency_stats ); } -// Concurrent connections benchmark template -bench::benchmark_result bench_concurrent_connections(int num_connections, int requests_per_conn) +bench::benchmark_result bench_concurrent_connections( int num_connections, int requests_per_conn ) { int total_requests = num_connections * requests_per_conn; std::cout << " Connections: " << num_connections @@ -174,71 +169,69 @@ bench::benchmark_result bench_concurrent_connections(int num_connections, int re std::vector clients; std::vector servers; - std::vector completed(num_connections, 0); - std::vector stats(num_connections); + std::vector completed( num_connections, 0 ); + std::vector stats( num_connections ); - clients.reserve(num_connections); - servers.reserve(num_connections); + clients.reserve( num_connections ); + servers.reserve( num_connections ); - for (int i = 0; i < num_connections; ++i) + for( int i = 0; i < num_connections; ++i ) { - auto [c, s] = corosio::test::make_socket_pair(ioc); - c.set_no_delay(true); - s.set_no_delay(true); - clients.push_back(std::move(c)); - servers.push_back(std::move(s)); + auto [c, s] = corosio::test::make_socket_pair( ioc ); + c.set_no_delay( true ); + s.set_no_delay( true ); + clients.push_back( std::move( c ) ); + servers.push_back( std::move( s ) ); } bench::stopwatch total_sw; - for (int i = 0; i < num_connections; ++i) + for( int i = 0; i < num_connections; ++i ) { - capy::run_async(ioc.get_executor())( - server_task(servers[i], requests_per_conn, completed[i])); - capy::run_async(ioc.get_executor())( - client_task(clients[i], requests_per_conn, stats[i])); + capy::run_async( ioc.get_executor() )( + server_task( servers[i], requests_per_conn, completed[i] ) ); + capy::run_async( ioc.get_executor() )( + client_task( clients[i], requests_per_conn, stats[i] ) ); } ioc.run(); double elapsed = total_sw.elapsed_seconds(); - double requests_per_sec = static_cast(total_requests) / elapsed; + double requests_per_sec = static_cast( total_requests ) / elapsed; - // Aggregate latency stats double total_mean = 0; double total_p99 = 0; - for (auto& s : stats) + for( auto& s : stats ) { total_mean += s.mean(); total_p99 += s.p99(); } std::cout << " Completed: " << total_requests << " requests\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(requests_per_sec) << "\n"; + std::cout << " Throughput: " << bench::format_rate( requests_per_sec ) << "\n"; std::cout << " Avg mean latency: " - << bench::format_latency(total_mean / num_connections) << "\n"; + << bench::format_latency( total_mean / num_connections ) << "\n"; std::cout << " Avg p99 latency: " - << bench::format_latency(total_p99 / num_connections) << "\n\n"; + << bench::format_latency( total_p99 / num_connections ) << "\n\n"; - for (auto& c : clients) + for( auto& c : clients ) c.close(); - for (auto& s : servers) + for( auto& s : servers ) s.close(); - return bench::benchmark_result("concurrent_" + std::to_string(num_connections)) - .add("num_connections", num_connections) - .add("requests_per_conn", requests_per_conn) - .add("total_requests", total_requests) - .add("requests_per_sec", requests_per_sec) - .add("avg_mean_latency_us", total_mean / num_connections) - .add("avg_p99_latency_us", total_p99 / num_connections); + return bench::benchmark_result( "concurrent_" + std::to_string( num_connections ) ) + .add( "num_connections", num_connections ) + .add( "requests_per_conn", requests_per_conn ) + .add( "total_requests", total_requests ) + .add( "requests_per_sec", requests_per_sec ) + .add( "avg_mean_latency_us", total_mean / num_connections ) + .add( "avg_p99_latency_us", total_p99 / num_connections ); } -// Multi-threaded benchmark: multiple threads calling run() template -bench::benchmark_result bench_multithread(int num_threads, int num_connections, int requests_per_conn) +bench::benchmark_result bench_multithread( int num_threads, int num_connections, int requests_per_conn ) { int total_requests = num_connections * requests_per_conn; std::cout << " Threads: " << num_threads @@ -250,221 +243,128 @@ bench::benchmark_result bench_multithread(int num_threads, int num_connections, std::vector clients; std::vector servers; - std::vector completed(num_connections, 0); - std::vector stats(num_connections); + std::vector completed( num_connections, 0 ); + std::vector stats( num_connections ); - clients.reserve(num_connections); - servers.reserve(num_connections); + clients.reserve( num_connections ); + servers.reserve( num_connections ); - for (int i = 0; i < num_connections; ++i) + for( int i = 0; i < num_connections; ++i ) { - auto [c, s] = corosio::test::make_socket_pair(ioc); - c.set_no_delay(true); - s.set_no_delay(true); - clients.push_back(std::move(c)); - servers.push_back(std::move(s)); + auto [c, s] = corosio::test::make_socket_pair( ioc ); + c.set_no_delay( true ); + s.set_no_delay( true ); + clients.push_back( std::move( c ) ); + servers.push_back( std::move( s ) ); } - // Spawn all coroutines before starting threads - for (int i = 0; i < num_connections; ++i) + for( int i = 0; i < num_connections; ++i ) { - capy::run_async(ioc.get_executor())( - server_task(servers[i], requests_per_conn, completed[i])); - capy::run_async(ioc.get_executor())( - client_task(clients[i], requests_per_conn, stats[i])); + capy::run_async( ioc.get_executor() )( + server_task( servers[i], requests_per_conn, completed[i] ) ); + capy::run_async( ioc.get_executor() )( + client_task( clients[i], requests_per_conn, stats[i] ) ); } bench::stopwatch total_sw; - // Launch worker threads std::vector threads; - threads.reserve(num_threads - 1); - for (int i = 1; i < num_threads; ++i) - threads.emplace_back([&ioc] { ioc.run(); }); + threads.reserve( num_threads - 1 ); + for( int i = 1; i < num_threads; ++i ) + threads.emplace_back( [&ioc] { ioc.run(); } ); - // Main thread also runs ioc.run(); - // Wait for all threads - for (auto& t : threads) + for( auto& t : threads ) t.join(); double elapsed = total_sw.elapsed_seconds(); - double requests_per_sec = static_cast(total_requests) / elapsed; + double requests_per_sec = static_cast( total_requests ) / elapsed; - // Aggregate latency stats double total_mean = 0; double total_p99 = 0; - for (auto& s : stats) + for( auto& s : stats ) { total_mean += s.mean(); total_p99 += s.p99(); } std::cout << " Completed: " << total_requests << " requests\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(requests_per_sec) << "\n"; + std::cout << " Throughput: " << bench::format_rate( requests_per_sec ) << "\n"; std::cout << " Avg mean latency: " - << bench::format_latency(total_mean / num_connections) << "\n"; + << bench::format_latency( total_mean / num_connections ) << "\n"; std::cout << " Avg p99 latency: " - << bench::format_latency(total_p99 / num_connections) << "\n\n"; + << bench::format_latency( total_p99 / num_connections ) << "\n\n"; - for (auto& c : clients) + for( auto& c : clients ) c.close(); - for (auto& s : servers) + for( auto& s : servers ) s.close(); - return bench::benchmark_result("multithread_" + std::to_string(num_threads) + "t") - .add("num_threads", num_threads) - .add("num_connections", num_connections) - .add("requests_per_conn", requests_per_conn) - .add("total_requests", total_requests) - .add("requests_per_sec", requests_per_sec) - .add("avg_mean_latency_us", total_mean / num_connections) - .add("avg_p99_latency_us", total_p99 / num_connections); + return bench::benchmark_result( "multithread_" + std::to_string( num_threads ) + "t" ) + .add( "num_threads", num_threads ) + .add( "num_connections", num_connections ) + .add( "requests_per_conn", requests_per_conn ) + .add( "total_requests", total_requests ) + .add( "requests_per_sec", requests_per_sec ) + .add( "avg_mean_latency_us", total_mean / num_connections ) + .add( "avg_p99_latency_us", total_p99 / num_connections ); } -// Run benchmarks for a specific context type +} // anonymous namespace + template -void run_benchmarks(char const* backend_name, char const* output_file, char const* bench_filter) +void run_http_server_benchmarks( + bench::result_collector& collector, + char const* filter ) { - std::cout << "Boost.Corosio HTTP Server Benchmarks\n"; - std::cout << "====================================\n"; - std::cout << "Backend: " << backend_name << "\n\n"; + std::cout << "\n>>> HTTP Server Benchmarks <<<\n"; - bench::result_collector collector(backend_name); - - bool run_all = !bench_filter || std::strcmp(bench_filter, "all") == 0; - - if (run_all || std::strcmp(bench_filter, "single_conn") == 0) - { - bench::print_header("Single Connection (Sequential Requests)"); - collector.add(bench_single_connection(10000)); - } + bool run_all = !filter || std::strcmp( filter, "all" ) == 0; - if (run_all || std::strcmp(bench_filter, "concurrent") == 0) + if( run_all || std::strcmp( filter, "single_conn" ) == 0 ) { - if (run_all) - std::this_thread::sleep_for(std::chrono::seconds(5)); - bench::print_header("Concurrent Connections"); - collector.add(bench_concurrent_connections(1, 10000)); - collector.add(bench_concurrent_connections(4, 2500)); - collector.add(bench_concurrent_connections(16, 625)); - collector.add(bench_concurrent_connections(32, 312)); + bench::print_header( "Single Connection (Sequential Requests)" ); + collector.add( bench_single_connection( 10000 ) ); } - if (run_all || std::strcmp(bench_filter, "multithread") == 0) + if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) { - if (run_all) - std::this_thread::sleep_for(std::chrono::seconds(5)); - bench::print_header("Multi-threaded (32 connections, varying threads)"); - collector.add(bench_multithread(1, 32, 312)); - collector.add(bench_multithread(2, 32, 312)); - collector.add(bench_multithread(4, 32, 312)); - collector.add(bench_multithread(8, 32, 312)); + if( run_all ) + std::this_thread::sleep_for( std::chrono::seconds( 5 ) ); + bench::print_header( "Concurrent Connections" ); + collector.add( bench_concurrent_connections( 1, 10000 ) ); + collector.add( bench_concurrent_connections( 4, 2500 ) ); + collector.add( bench_concurrent_connections( 16, 625 ) ); + collector.add( bench_concurrent_connections( 32, 312 ) ); } - std::cout << "\nBenchmarks complete.\n"; - - if (output_file) + if( run_all || std::strcmp( filter, "multithread" ) == 0 ) { - if (collector.write_json(output_file)) - std::cout << "Results written to: " << output_file << "\n"; - else - std::cerr << "Error: Failed to write results to: " << output_file << "\n"; + if( run_all ) + std::this_thread::sleep_for( std::chrono::seconds( 5 ) ); + bench::print_header( "Multi-threaded (32 connections, varying threads)" ); + collector.add( bench_multithread( 1, 32, 312 ) ); + collector.add( bench_multithread( 2, 32, 312 ) ); + collector.add( bench_multithread( 4, 32, 312 ) ); + collector.add( bench_multithread( 8, 32, 312 ) ); } } -void print_usage(char const* program_name) -{ - std::cout << "Usage: " << program_name << " [OPTIONS]\n\n"; - std::cout << "Options:\n"; - std::cout << " --backend Select I/O backend (default: platform default)\n"; - std::cout << " --bench Run only the specified benchmark\n"; - std::cout << " --output Write JSON results to file\n"; - std::cout << " --list List available backends\n"; - std::cout << " --help Show this help message\n"; - std::cout << "\n"; - std::cout << "Available benchmarks:\n"; - std::cout << " single_conn Single connection, sequential requests\n"; - std::cout << " concurrent Multiple concurrent connections\n"; - std::cout << " multithread Multi-threaded with varying thread counts\n"; - std::cout << " all Run all benchmarks (default)\n"; - std::cout << "\n"; - bench::print_available_backends(); -} - -int main(int argc, char* argv[]) -{ - char const* backend = nullptr; - char const* output_file = nullptr; - char const* bench_filter = nullptr; - - for (int i = 1; i < argc; ++i) - { - if (std::strcmp(argv[i], "--backend") == 0) - { - if (i + 1 < argc) - { - backend = argv[++i]; - } - else - { - std::cerr << "Error: --backend requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--bench") == 0) - { - if (i + 1 < argc) - { - bench_filter = argv[++i]; - } - else - { - std::cerr << "Error: --bench requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--output") == 0) - { - if (i + 1 < argc) - { - output_file = argv[++i]; - } - else - { - std::cerr << "Error: --output requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--list") == 0) - { - bench::print_available_backends(); - return 0; - } - else if (std::strcmp(argv[i], "--help") == 0 || std::strcmp(argv[i], "-h") == 0) - { - print_usage(argv[0]); - return 0; - } - else - { - std::cerr << "Unknown option: " << argv[i] << "\n"; - print_usage(argv[0]); - return 1; - } - } - - // If no backend specified, use platform default - if (!backend) - backend = bench::default_backend_name(); - - // Dispatch to the selected backend using a generic lambda - return bench::dispatch_backend(backend, - [=](const char* name) - { - run_benchmarks(name, output_file, bench_filter); - }); -} +// Explicit instantiations +#if BOOST_COROSIO_HAS_EPOLL +template void run_http_server_benchmarks( + bench::result_collector&, char const* ); +#endif +#if BOOST_COROSIO_HAS_SELECT +template void run_http_server_benchmarks( + bench::result_collector&, char const* ); +#endif +#if BOOST_COROSIO_HAS_IOCP +template void run_http_server_benchmarks( + bench::result_collector&, char const* ); +#endif + +} // namespace corosio_bench diff --git a/bench/corosio/io_context_bench.cpp b/bench/corosio/io_context_bench.cpp index 1eee5065..57d033d8 100644 --- a/bench/corosio/io_context_bench.cpp +++ b/bench/corosio/io_context_bench.cpp @@ -7,6 +7,8 @@ // Official repository: https://github.com/cppalliance/corosio // +#include "benchmarks.hpp" + #include #include #include @@ -18,34 +20,30 @@ #include #include -#include "../common/backend_selection.hpp" #include "../common/benchmark.hpp" namespace corosio = boost::corosio; namespace capy = boost::capy; -// Coroutine that increments a counter -capy::task<> increment_task(int& counter) +namespace corosio_bench { +namespace { + +capy::task<> increment_task( int& counter ) { ++counter; co_return; } -// Coroutine that increments an atomic counter -capy::task<> atomic_increment_task(std::atomic& counter) +capy::task<> atomic_increment_task( std::atomic& counter ) { - counter.fetch_add(1, std::memory_order_relaxed); + counter.fetch_add( 1, std::memory_order_relaxed ); co_return; } -// Measures the raw throughput of posting and executing coroutines from a single -// thread. This establishes a baseline for the scheduler's best-case performance -// without any synchronization overhead. Useful for comparing coroutine dispatch -// efficiency against other async frameworks and identifying per-handler overhead. -template -bench::benchmark_result bench_single_threaded_post(int num_handlers) +template +bench::benchmark_result bench_single_threaded_post( int num_handlers ) { - bench::print_header("Single-threaded Handler Post"); + bench::print_header( "Single-threaded Handler Post" ); Context ioc; auto ex = ioc.get_executor(); @@ -53,84 +51,76 @@ bench::benchmark_result bench_single_threaded_post(int num_handlers) bench::stopwatch sw; - for (int i = 0; i < num_handlers; ++i) - capy::run_async(ex)(increment_task(counter)); + for( int i = 0; i < num_handlers; ++i ) + capy::run_async( ex )( increment_task( counter ) ); ioc.run(); double elapsed = sw.elapsed_seconds(); - double ops_per_sec = static_cast(num_handlers) / elapsed; + double ops_per_sec = static_cast( num_handlers ) / elapsed; std::cout << " Handlers: " << num_handlers << "\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(ops_per_sec) << "\n"; + std::cout << " Throughput: " << bench::format_rate( ops_per_sec ) << "\n"; - if (counter != num_handlers) + if( counter != num_handlers ) { std::cerr << " ERROR: counter mismatch! Expected " << num_handlers << ", got " << counter << "\n"; } - return bench::benchmark_result("single_threaded_post") - .add("handlers", num_handlers) - .add("elapsed_s", elapsed) - .add("ops_per_sec", ops_per_sec); + return bench::benchmark_result( "single_threaded_post" ) + .add( "handlers", num_handlers ) + .add( "elapsed_s", elapsed ) + .add( "ops_per_sec", ops_per_sec ); } -// Measures how throughput scales when multiple threads call run() on the same -// io_context. Pre-posts all work, then times execution across 1, 2, 4, 8 threads. -// Reveals lock contention in the scheduler's work queue. Ideal scaling would show -// linear speedup; sub-linear or negative scaling indicates contention issues that -// may need strand-based partitioning in real applications. -template -bench::benchmark_result bench_multithreaded_scaling(int num_handlers, int max_threads) +template +bench::benchmark_result bench_multithreaded_scaling( int num_handlers, int max_threads ) { - bench::print_header("Multi-threaded Scaling"); + bench::print_header( "Multi-threaded Scaling" ); std::cout << " Handlers per test: " << num_handlers << "\n\n"; - bench::benchmark_result result("multithreaded_scaling"); - result.add("handlers", num_handlers); + bench::benchmark_result result( "multithreaded_scaling" ); + result.add( "handlers", num_handlers ); double baseline_ops = 0; - for (int num_threads = 1; num_threads <= max_threads; num_threads *= 2) + for( int num_threads = 1; num_threads <= max_threads; num_threads *= 2 ) { Context ioc; auto ex = ioc.get_executor(); - std::atomic counter{0}; + std::atomic counter{ 0 }; - // Post all handlers first - for (int i = 0; i < num_handlers; ++i) - capy::run_async(ex)(atomic_increment_task(counter)); + for( int i = 0; i < num_handlers; ++i ) + capy::run_async( ex )( atomic_increment_task( counter ) ); bench::stopwatch sw; - // Run with multiple threads std::vector runners; - for (int t = 0; t < num_threads; ++t) - runners.emplace_back([&ioc]() { ioc.run(); }); + for( int t = 0; t < num_threads; ++t ) + runners.emplace_back( [&ioc]() { ioc.run(); } ); - for (auto& t : runners) + for( auto& t : runners ) t.join(); double elapsed = sw.elapsed_seconds(); - double ops_per_sec = static_cast(num_handlers) / elapsed; + double ops_per_sec = static_cast( num_handlers ) / elapsed; std::cout << " " << num_threads << " thread(s): " - << bench::format_rate(ops_per_sec); + << bench::format_rate( ops_per_sec ); - if (num_threads == 1) + if( num_threads == 1 ) baseline_ops = ops_per_sec; - else if (baseline_ops > 0) - std::cout << " (speedup: " << std::fixed << std::setprecision(2) - << (ops_per_sec / baseline_ops) << "x)"; + else if( baseline_ops > 0 ) + std::cout << " (speedup: " << std::fixed << std::setprecision( 2 ) + << ( ops_per_sec / baseline_ops ) << "x)"; std::cout << "\n"; - // Record per-thread results - result.add("threads_" + std::to_string(num_threads) + "_ops_per_sec", ops_per_sec); + result.add( "threads_" + std::to_string( num_threads ) + "_ops_per_sec", ops_per_sec ); - if (counter.load() != num_handlers) + if( counter.load() != num_handlers ) { std::cerr << " ERROR: counter mismatch! Expected " << num_handlers << ", got " << counter.load() << "\n"; @@ -140,15 +130,10 @@ bench::benchmark_result bench_multithreaded_scaling(int num_handlers, int max_th return result; } -// Measures performance when posting and polling are interleaved, simulating a -// game loop or GUI event pump that processes available work each frame. Posts a -// batch of handlers, calls poll() to execute ready work, then repeats. Tests the -// efficiency of poll() with small work batches and frequent context restarts, -// which is common in latency-sensitive applications that can't block on run(). -template -bench::benchmark_result bench_interleaved_post_run(int iterations, int handlers_per_iteration) +template +bench::benchmark_result bench_interleaved_post_run( int iterations, int handlers_per_iteration ) { - bench::print_header("Interleaved Post/Run"); + bench::print_header( "Interleaved Post/Run" ); Context ioc; auto ex = ioc.get_executor(); @@ -157,236 +142,137 @@ bench::benchmark_result bench_interleaved_post_run(int iterations, int handlers_ bench::stopwatch sw; - for (int iter = 0; iter < iterations; ++iter) + for( int iter = 0; iter < iterations; ++iter ) { - for (int i = 0; i < handlers_per_iteration; ++i) - capy::run_async(ex)(increment_task(counter)); + for( int i = 0; i < handlers_per_iteration; ++i ) + capy::run_async( ex )( increment_task( counter ) ); ioc.poll(); ioc.restart(); } - // Run any remaining handlers ioc.run(); double elapsed = sw.elapsed_seconds(); - double ops_per_sec = static_cast(total_handlers) / elapsed; + double ops_per_sec = static_cast( total_handlers ) / elapsed; std::cout << " Iterations: " << iterations << "\n"; std::cout << " Handlers/iter: " << handlers_per_iteration << "\n"; std::cout << " Total handlers: " << total_handlers << "\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(ops_per_sec) << "\n"; + std::cout << " Throughput: " << bench::format_rate( ops_per_sec ) << "\n"; - if (counter != total_handlers) + if( counter != total_handlers ) { std::cerr << " ERROR: counter mismatch! Expected " << total_handlers << ", got " << counter << "\n"; } - return bench::benchmark_result("interleaved_post_run") - .add("iterations", iterations) - .add("handlers_per_iteration", handlers_per_iteration) - .add("total_handlers", total_handlers) - .add("elapsed_s", elapsed) - .add("ops_per_sec", ops_per_sec); + return bench::benchmark_result( "interleaved_post_run" ) + .add( "iterations", iterations ) + .add( "handlers_per_iteration", handlers_per_iteration ) + .add( "total_handlers", total_handlers ) + .add( "elapsed_s", elapsed ) + .add( "ops_per_sec", ops_per_sec ); } -// Measures performance under realistic concurrent load where multiple threads -// simultaneously post work AND execute it. This is the most stressful test for -// the scheduler's synchronization, as threads contend for both the submission -// and completion paths. Simulates server workloads where worker threads both -// generate new tasks and process existing ones, revealing producer-consumer -// bottlenecks. -template -bench::benchmark_result bench_concurrent_post_run(int num_threads, int handlers_per_thread) +template +bench::benchmark_result bench_concurrent_post_run( int num_threads, int handlers_per_thread ) { - bench::print_header("Concurrent Post and Run"); + bench::print_header( "Concurrent Post and Run" ); Context ioc; auto ex = ioc.get_executor(); - std::atomic counter{0}; + std::atomic counter{ 0 }; int total_handlers = num_threads * handlers_per_thread; bench::stopwatch sw; - // Launch threads that both post and run std::vector workers; - for (int t = 0; t < num_threads; ++t) + for( int t = 0; t < num_threads; ++t ) { - workers.emplace_back([&ex, &ioc, &counter, handlers_per_thread]() + workers.emplace_back( [&ex, &ioc, &counter, handlers_per_thread]() { - for (int i = 0; i < handlers_per_thread; ++i) - capy::run_async(ex)(atomic_increment_task(counter)); + for( int i = 0; i < handlers_per_thread; ++i ) + capy::run_async( ex )( atomic_increment_task( counter ) ); ioc.run(); - }); + } ); } - for (auto& t : workers) + for( auto& t : workers ) t.join(); double elapsed = sw.elapsed_seconds(); - double ops_per_sec = static_cast(total_handlers) / elapsed; + double ops_per_sec = static_cast( total_handlers ) / elapsed; std::cout << " Threads: " << num_threads << "\n"; std::cout << " Handlers/thread: " << handlers_per_thread << "\n"; std::cout << " Total handlers: " << total_handlers << "\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_rate(ops_per_sec) << "\n"; + std::cout << " Throughput: " << bench::format_rate( ops_per_sec ) << "\n"; - if (counter.load() != total_handlers) + if( counter.load() != total_handlers ) { std::cerr << " ERROR: counter mismatch! Expected " << total_handlers << ", got " << counter.load() << "\n"; } - return bench::benchmark_result("concurrent_post_run") - .add("threads", num_threads) - .add("handlers_per_thread", handlers_per_thread) - .add("total_handlers", total_handlers) - .add("elapsed_s", elapsed) - .add("ops_per_sec", ops_per_sec); + return bench::benchmark_result( "concurrent_post_run" ) + .add( "threads", num_threads ) + .add( "handlers_per_thread", handlers_per_thread ) + .add( "total_handlers", total_handlers ) + .add( "elapsed_s", elapsed ) + .add( "ops_per_sec", ops_per_sec ); } -// Run benchmarks for a specific context type -template -void run_benchmarks(const char* backend_name, const char* output_file, const char* bench_filter) -{ - std::cout << "Boost.Corosio io_context Benchmarks\n"; - std::cout << "====================================\n"; - std::cout << "Backend: " << backend_name << "\n\n"; +} // anonymous namespace - bench::result_collector collector(backend_name); +template +void run_io_context_benchmarks( + bench::result_collector& collector, + char const* filter ) +{ + std::cout << "\n>>> io_context Benchmarks <<<\n"; - bool run_all = !bench_filter || std::strcmp(bench_filter, "all") == 0; + bool run_all = !filter || std::strcmp( filter, "all" ) == 0; // Warm up { Context ioc; auto ex = ioc.get_executor(); int counter = 0; - for (int i = 0; i < 1000; ++i) - capy::run_async(ex)(increment_task(counter)); + for( int i = 0; i < 1000; ++i ) + capy::run_async( ex )( increment_task( counter ) ); ioc.run(); } - // Run selected benchmarks - if (run_all || std::strcmp(bench_filter, "single_threaded") == 0) - collector.add(bench_single_threaded_post(1000000)); + if( run_all || std::strcmp( filter, "single_threaded" ) == 0 ) + collector.add( bench_single_threaded_post( 1000000 ) ); - if (run_all || std::strcmp(bench_filter, "multithreaded") == 0) - collector.add(bench_multithreaded_scaling(1000000, 8)); + if( run_all || std::strcmp( filter, "multithreaded" ) == 0 ) + collector.add( bench_multithreaded_scaling( 1000000, 8 ) ); - if (run_all || std::strcmp(bench_filter, "interleaved") == 0) - collector.add(bench_interleaved_post_run(10000, 100)); - - if (run_all || std::strcmp(bench_filter, "concurrent") == 0) - collector.add(bench_concurrent_post_run(4, 250000)); - - std::cout << "\nBenchmarks complete.\n"; - - if (output_file) - { - if (collector.write_json(output_file)) - std::cout << "Results written to: " << output_file << "\n"; - else - std::cerr << "Error: Failed to write results to: " << output_file << "\n"; - } -} + if( run_all || std::strcmp( filter, "interleaved" ) == 0 ) + collector.add( bench_interleaved_post_run( 10000, 100 ) ); -void print_usage(const char* program_name) -{ - std::cout << "Usage: " << program_name << " [OPTIONS]\n\n"; - std::cout << "Options:\n"; - std::cout << " --backend Select I/O backend (default: platform default)\n"; - std::cout << " --bench Run only the specified benchmark\n"; - std::cout << " --output Write JSON results to file\n"; - std::cout << " --list List available backends\n"; - std::cout << " --help Show this help message\n"; - std::cout << "\n"; - std::cout << "Available benchmarks:\n"; - std::cout << " single_threaded Single-threaded handler post throughput\n"; - std::cout << " multithreaded Multi-threaded scaling test\n"; - std::cout << " interleaved Interleaved post/poll pattern\n"; - std::cout << " concurrent Concurrent post and run\n"; - std::cout << " all Run all benchmarks (default)\n"; - std::cout << "\n"; - bench::print_available_backends(); + if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) + collector.add( bench_concurrent_post_run( 4, 250000 ) ); } -int main(int argc, char* argv[]) -{ - const char* backend = nullptr; - const char* output_file = nullptr; - const char* bench_filter = nullptr; - - // Parse command-line arguments - for (int i = 1; i < argc; ++i) - { - if (std::strcmp(argv[i], "--backend") == 0) - { - if (i + 1 < argc) - { - backend = argv[++i]; - } - else - { - std::cerr << "Error: --backend requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--bench") == 0) - { - if (i + 1 < argc) - { - bench_filter = argv[++i]; - } - else - { - std::cerr << "Error: --bench requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--output") == 0) - { - if (i + 1 < argc) - { - output_file = argv[++i]; - } - else - { - std::cerr << "Error: --output requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--list") == 0) - { - bench::print_available_backends(); - return 0; - } - else if (std::strcmp(argv[i], "--help") == 0 || std::strcmp(argv[i], "-h") == 0) - { - print_usage(argv[0]); - return 0; - } - else - { - std::cerr << "Unknown option: " << argv[i] << "\n"; - print_usage(argv[0]); - return 1; - } - } - - // If no backend specified, use platform default - if (!backend) - backend = bench::default_backend_name(); - - // Dispatch to the selected backend using a generic lambda - return bench::dispatch_backend(backend, - [=](const char* name) - { - run_benchmarks(name, output_file, bench_filter); - }); -} +// Explicit instantiations +#if BOOST_COROSIO_HAS_EPOLL +template void run_io_context_benchmarks( + bench::result_collector&, char const* ); +#endif +#if BOOST_COROSIO_HAS_SELECT +template void run_io_context_benchmarks( + bench::result_collector&, char const* ); +#endif +#if BOOST_COROSIO_HAS_IOCP +template void run_io_context_benchmarks( + bench::result_collector&, char const* ); +#endif + +} // namespace corosio_bench diff --git a/bench/corosio/main.cpp b/bench/corosio/main.cpp new file mode 100644 index 00000000..28a38697 --- /dev/null +++ b/bench/corosio/main.cpp @@ -0,0 +1,175 @@ +// +// Copyright (c) 2026 Steve Gerbino +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +// Official repository: https://github.com/cppalliance/corosio +// + +#include "benchmarks.hpp" + +#include +#include + +#include +#include + +#include "../common/backend_selection.hpp" +#include "../common/benchmark.hpp" + +namespace corosio = boost::corosio; + +namespace { + +template +void run_benchmarks( + char const* backend_name, + char const* output_file, + char const* category_filter, + char const* bench_filter ) +{ + std::cout << "Boost.Corosio Benchmarks\n"; + std::cout << "========================\n"; + std::cout << "Backend: " << backend_name << "\n"; + + bench::result_collector collector( backend_name ); + + bool run_all = !category_filter || std::strcmp( category_filter, "all" ) == 0; + + if( run_all || std::strcmp( category_filter, "io_context" ) == 0 ) + corosio_bench::run_io_context_benchmarks( collector, bench_filter ); + + if( run_all || std::strcmp( category_filter, "socket_throughput" ) == 0 ) + corosio_bench::run_socket_throughput_benchmarks( collector, bench_filter ); + + if( run_all || std::strcmp( category_filter, "socket_latency" ) == 0 ) + corosio_bench::run_socket_latency_benchmarks( collector, bench_filter ); + + if( run_all || std::strcmp( category_filter, "http_server" ) == 0 ) + corosio_bench::run_http_server_benchmarks( collector, bench_filter ); + + std::cout << "\nBenchmarks complete.\n"; + + if( output_file ) + { + if( collector.write_json( output_file ) ) + std::cout << "Results written to: " << output_file << "\n"; + else + std::cerr << "Error: Failed to write results to: " << output_file << "\n"; + } +} + +void print_usage( char const* program_name ) +{ + std::cout << "Usage: " << program_name << " [OPTIONS]\n\n"; + std::cout << "Options:\n"; + std::cout << " --backend Select I/O backend (default: platform default)\n"; + std::cout << " --category Run only the specified benchmark category\n"; + std::cout << " --bench Run only the specified benchmark within category\n"; + std::cout << " --output Write JSON results to file\n"; + std::cout << " --list List available backends\n"; + std::cout << " --help Show this help message\n"; + std::cout << "\n"; + std::cout << "Benchmark categories:\n"; + std::cout << " io_context io_context handler throughput tests\n"; + std::cout << " socket_throughput Socket throughput tests\n"; + std::cout << " socket_latency Socket latency tests\n"; + std::cout << " http_server HTTP server benchmarks\n"; + std::cout << " all Run all categories (default)\n"; + std::cout << "\n"; + std::cout << "Individual benchmarks (--bench):\n"; + std::cout << " io_context: single_threaded, multithreaded, interleaved, concurrent\n"; + std::cout << " socket_throughput: unidirectional, bidirectional\n"; + std::cout << " socket_latency: pingpong, concurrent\n"; + std::cout << " http_server: single_conn, concurrent, multithread\n"; + std::cout << "\n"; + bench::print_available_backends(); +} + +} // anonymous namespace + +int main( int argc, char* argv[] ) +{ + char const* backend = nullptr; + char const* output_file = nullptr; + char const* category_filter = nullptr; + char const* bench_filter = nullptr; + + for( int i = 1; i < argc; ++i ) + { + if( std::strcmp( argv[i], "--backend" ) == 0 ) + { + if( i + 1 < argc ) + { + backend = argv[++i]; + } + else + { + std::cerr << "Error: --backend requires an argument\n"; + return 1; + } + } + else if( std::strcmp( argv[i], "--category" ) == 0 ) + { + if( i + 1 < argc ) + { + category_filter = argv[++i]; + } + else + { + std::cerr << "Error: --category requires an argument\n"; + return 1; + } + } + else if( std::strcmp( argv[i], "--bench" ) == 0 ) + { + if( i + 1 < argc ) + { + bench_filter = argv[++i]; + } + else + { + std::cerr << "Error: --bench requires an argument\n"; + return 1; + } + } + else if( std::strcmp( argv[i], "--output" ) == 0 ) + { + if( i + 1 < argc ) + { + output_file = argv[++i]; + } + else + { + std::cerr << "Error: --output requires an argument\n"; + return 1; + } + } + else if( std::strcmp( argv[i], "--list" ) == 0 ) + { + bench::print_available_backends(); + return 0; + } + else if( std::strcmp( argv[i], "--help" ) == 0 || std::strcmp( argv[i], "-h" ) == 0 ) + { + print_usage( argv[0] ); + return 0; + } + else + { + std::cerr << "Unknown option: " << argv[i] << "\n"; + print_usage( argv[0] ); + return 1; + } + } + + if( !backend ) + backend = bench::default_backend_name(); + + return bench::dispatch_backend( backend, + [=]( char const* name ) + { + run_benchmarks( name, output_file, category_filter, bench_filter ); + } ); +} diff --git a/bench/corosio/socket_latency_bench.cpp b/bench/corosio/socket_latency_bench.cpp index 9e15aa30..9ecd0c84 100644 --- a/bench/corosio/socket_latency_bench.cpp +++ b/bench/corosio/socket_latency_bench.cpp @@ -7,7 +7,10 @@ // Official repository: https://github.com/cppalliance/corosio // +#include "benchmarks.hpp" + #include +#include #include #include #include @@ -20,112 +23,97 @@ #include #include -#include "../common/backend_selection.hpp" #include "../common/benchmark.hpp" namespace corosio = boost::corosio; namespace capy = boost::capy; -// Ping-pong coroutine task +namespace corosio_bench { +namespace { + capy::task<> pingpong_task( corosio::tcp_socket& client, corosio::tcp_socket& server, std::size_t message_size, int iterations, - bench::statistics& stats) + bench::statistics& stats ) { - std::vector send_buf(message_size, 'P'); - std::vector recv_buf(message_size); + std::vector send_buf( message_size, 'P' ); + std::vector recv_buf( message_size ); - for (int i = 0; i < iterations; ++i) + for( int i = 0; i < iterations; ++i ) { bench::stopwatch sw; - // Client sends ping auto [ec1, n1] = co_await capy::write( - client, capy::const_buffer(send_buf.data(), send_buf.size())); - if (ec1) + client, capy::const_buffer( send_buf.data(), send_buf.size() ) ); + if( ec1 ) { std::cerr << " Write error: " << ec1.message() << "\n"; co_return; } - // Server receives ping auto [ec2, n2] = co_await capy::read( - server, capy::mutable_buffer(recv_buf.data(), recv_buf.size())); - if (ec2) + server, capy::mutable_buffer( recv_buf.data(), recv_buf.size() ) ); + if( ec2 ) { std::cerr << " Server read error: " << ec2.message() << "\n"; co_return; } - // Server sends pong auto [ec3, n3] = co_await capy::write( - server, capy::const_buffer(recv_buf.data(), n2)); - if (ec3) + server, capy::const_buffer( recv_buf.data(), n2 ) ); + if( ec3 ) { std::cerr << " Server write error: " << ec3.message() << "\n"; co_return; } - // Client receives pong auto [ec4, n4] = co_await capy::read( - client, capy::mutable_buffer(recv_buf.data(), recv_buf.size())); - if (ec4) + client, capy::mutable_buffer( recv_buf.data(), recv_buf.size() ) ); + if( ec4 ) { std::cerr << " Client read error: " << ec4.message() << "\n"; co_return; } double rtt_us = sw.elapsed_us(); - stats.add(rtt_us); + stats.add( rtt_us ); } } -// Measures round-trip latency for a request-response pattern over loopback sockets. -// Client sends a message, server echoes it back, measuring the complete cycle time. -// This is the fundamental latency metric for RPC-style protocols. Reports mean, -// median (p50), and tail latencies (p99, p99.9) which are critical for SLA compliance. -// Different message sizes reveal fixed overhead vs. size-dependent costs. template -bench::benchmark_result bench_pingpong_latency(std::size_t message_size, int iterations) +bench::benchmark_result bench_pingpong_latency( std::size_t message_size, int iterations ) { std::cout << " Message size: " << message_size << " bytes, "; std::cout << "Iterations: " << iterations << "\n"; Context ioc; - auto [client, server] = corosio::test::make_socket_pair(ioc); + auto [client, server] = corosio::test::make_socket_pair( ioc ); - // Disable Nagle's algorithm for low latency - client.set_no_delay(true); - server.set_no_delay(true); + client.set_no_delay( true ); + server.set_no_delay( true ); bench::statistics latency_stats; - capy::run_async(ioc.get_executor())( - pingpong_task(client, server, message_size, iterations, latency_stats)); + capy::run_async( ioc.get_executor() )( + pingpong_task( client, server, message_size, iterations, latency_stats ) ); ioc.run(); - bench::print_latency_stats(latency_stats, "Round-trip latency"); + bench::print_latency_stats( latency_stats, "Round-trip latency" ); std::cout << "\n"; client.close(); server.close(); - return bench::benchmark_result("pingpong_" + std::to_string(message_size)) - .add("message_size", static_cast(message_size)) - .add("iterations", iterations) - .add_latency_stats("rtt", latency_stats); + return bench::benchmark_result( "pingpong_" + std::to_string( message_size ) ) + .add( "message_size", static_cast( message_size ) ) + .add( "iterations", iterations ) + .add_latency_stats( "rtt", latency_stats ); } -// Measures latency degradation under concurrent connection load. Multiple socket -// pairs perform ping-pong simultaneously, revealing how latency increases as the -// scheduler multiplexes more connections. Critical for capacity planning: shows -// how many concurrent connections can be sustained before latency becomes -// unacceptable. A well-designed scheduler should show gradual degradation rather -// than sudden latency spikes. template -bench::benchmark_result bench_concurrent_latency(int num_pairs, std::size_t message_size, int iterations) +bench::benchmark_result bench_concurrent_latency( int num_pairs, std::size_t message_size, int iterations ) { std::cout << " Concurrent pairs: " << num_pairs << ", "; std::cout << "Message size: " << message_size << " bytes, "; @@ -133,200 +121,108 @@ bench::benchmark_result bench_concurrent_latency(int num_pairs, std::size_t mess Context ioc; - // Store sockets and stats separately for safe reference passing std::vector clients; std::vector servers; - std::vector stats(num_pairs); + std::vector stats( num_pairs ); - clients.reserve(num_pairs); - servers.reserve(num_pairs); + clients.reserve( num_pairs ); + servers.reserve( num_pairs ); - for (int i = 0; i < num_pairs; ++i) + for( int i = 0; i < num_pairs; ++i ) { - auto [c, s] = corosio::test::make_socket_pair(ioc); - // Disable Nagle's algorithm for low latency - c.set_no_delay(true); - s.set_no_delay(true); - clients.push_back(std::move(c)); - servers.push_back(std::move(s)); + auto [c, s] = corosio::test::make_socket_pair( ioc ); + c.set_no_delay( true ); + s.set_no_delay( true ); + clients.push_back( std::move( c ) ); + servers.push_back( std::move( s ) ); } - // Launch concurrent ping-pong tasks - for (int p = 0; p < num_pairs; ++p) + for( int p = 0; p < num_pairs; ++p ) { - capy::run_async(ioc.get_executor())( - pingpong_task(clients[p], servers[p], message_size, iterations, stats[p])); + capy::run_async( ioc.get_executor() )( + pingpong_task( clients[p], servers[p], message_size, iterations, stats[p] ) ); } ioc.run(); std::cout << " Per-pair results:\n"; - for (int i = 0; i < num_pairs && i < 3; ++i) + for( int i = 0; i < num_pairs && i < 3; ++i ) { std::cout << " Pair " << i << ": mean=" - << bench::format_latency(stats[i].mean()) - << ", p99=" << bench::format_latency(stats[i].p99()) + << bench::format_latency( stats[i].mean() ) + << ", p99=" << bench::format_latency( stats[i].p99() ) << "\n"; } - if (num_pairs > 3) - std::cout << " ... (" << (num_pairs - 3) << " more pairs)\n"; + if( num_pairs > 3 ) + std::cout << " ... (" << ( num_pairs - 3 ) << " more pairs)\n"; - // Calculate average across all pairs double total_mean = 0; double total_p99 = 0; - for (auto& s : stats) + for( auto& s : stats ) { total_mean += s.mean(); total_p99 += s.p99(); } std::cout << " Average mean latency: " - << bench::format_latency(total_mean / num_pairs) << "\n"; + << bench::format_latency( total_mean / num_pairs ) << "\n"; std::cout << " Average p99 latency: " - << bench::format_latency(total_p99 / num_pairs) << "\n\n"; + << bench::format_latency( total_p99 / num_pairs ) << "\n\n"; - for (auto& c : clients) + for( auto& c : clients ) c.close(); - for (auto& s : servers) + for( auto& s : servers ) s.close(); - return bench::benchmark_result("concurrent_" + std::to_string(num_pairs) + "_pairs") - .add("num_pairs", num_pairs) - .add("message_size", static_cast(message_size)) - .add("iterations", iterations) - .add("avg_mean_latency_us", total_mean / num_pairs) - .add("avg_p99_latency_us", total_p99 / num_pairs); + return bench::benchmark_result( "concurrent_" + std::to_string( num_pairs ) + "_pairs" ) + .add( "num_pairs", num_pairs ) + .add( "message_size", static_cast( message_size ) ) + .add( "iterations", iterations ) + .add( "avg_mean_latency_us", total_mean / num_pairs ) + .add( "avg_p99_latency_us", total_p99 / num_pairs ); } -// Run benchmarks for a specific context type +} // anonymous namespace + template -void run_benchmarks(const char* backend_name, const char* output_file, const char* bench_filter) +void run_socket_latency_benchmarks( + bench::result_collector& collector, + char const* filter ) { - std::cout << "Boost.Corosio Socket Latency Benchmarks\n"; - std::cout << "=======================================\n"; - std::cout << "Backend: " << backend_name << "\n\n"; + std::cout << "\n>>> Socket Latency Benchmarks <<<\n"; - bench::result_collector collector(backend_name); + bool run_all = !filter || std::strcmp( filter, "all" ) == 0; - bool run_all = !bench_filter || std::strcmp(bench_filter, "all") == 0; - - // Variable message sizes - std::vector message_sizes = {1, 64, 1024}; + std::vector message_sizes = { 1, 64, 1024 }; int iterations = 1000; - if (run_all || std::strcmp(bench_filter, "pingpong") == 0) + if( run_all || std::strcmp( filter, "pingpong" ) == 0 ) { - bench::print_header("Ping-Pong Round-Trip Latency"); - for (auto size : message_sizes) - collector.add(bench_pingpong_latency(size, iterations)); + bench::print_header( "Ping-Pong Round-Trip Latency" ); + for( auto size : message_sizes ) + collector.add( bench_pingpong_latency( size, iterations ) ); } - if (run_all || std::strcmp(bench_filter, "concurrent") == 0) + if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) { - bench::print_header("Concurrent Socket Pairs Latency"); - collector.add(bench_concurrent_latency(1, 64, 1000)); - collector.add(bench_concurrent_latency(4, 64, 500)); - collector.add(bench_concurrent_latency(16, 64, 250)); - } - - std::cout << "\nBenchmarks complete.\n"; - - if (output_file) - { - if (collector.write_json(output_file)) - std::cout << "Results written to: " << output_file << "\n"; - else - std::cerr << "Error: Failed to write results to: " << output_file << "\n"; + bench::print_header( "Concurrent Socket Pairs Latency" ); + collector.add( bench_concurrent_latency( 1, 64, 1000 ) ); + collector.add( bench_concurrent_latency( 4, 64, 500 ) ); + collector.add( bench_concurrent_latency( 16, 64, 250 ) ); } } -void print_usage(const char* program_name) -{ - std::cout << "Usage: " << program_name << " [OPTIONS]\n\n"; - std::cout << "Options:\n"; - std::cout << " --backend Select I/O backend (default: platform default)\n"; - std::cout << " --bench Run only the specified benchmark\n"; - std::cout << " --output Write JSON results to file\n"; - std::cout << " --list List available backends\n"; - std::cout << " --help Show this help message\n"; - std::cout << "\n"; - std::cout << "Available benchmarks:\n"; - std::cout << " pingpong Ping-pong round-trip latency (various message sizes)\n"; - std::cout << " concurrent Concurrent socket pairs latency\n"; - std::cout << " all Run all benchmarks (default)\n"; - std::cout << "\n"; - bench::print_available_backends(); -} - -int main(int argc, char* argv[]) -{ - const char* backend = nullptr; - const char* output_file = nullptr; - const char* bench_filter = nullptr; - - for (int i = 1; i < argc; ++i) - { - if (std::strcmp(argv[i], "--backend") == 0) - { - if (i + 1 < argc) - { - backend = argv[++i]; - } - else - { - std::cerr << "Error: --backend requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--bench") == 0) - { - if (i + 1 < argc) - { - bench_filter = argv[++i]; - } - else - { - std::cerr << "Error: --bench requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--output") == 0) - { - if (i + 1 < argc) - { - output_file = argv[++i]; - } - else - { - std::cerr << "Error: --output requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--list") == 0) - { - bench::print_available_backends(); - return 0; - } - else if (std::strcmp(argv[i], "--help") == 0 || std::strcmp(argv[i], "-h") == 0) - { - print_usage(argv[0]); - return 0; - } - else - { - std::cerr << "Unknown option: " << argv[i] << "\n"; - print_usage(argv[0]); - return 1; - } - } - - // If no backend specified, use platform default - if (!backend) - backend = bench::default_backend_name(); - - // Dispatch to the selected backend using a generic lambda - return bench::dispatch_backend(backend, - [=](const char* name) - { - run_benchmarks(name, output_file, bench_filter); - }); -} +// Explicit instantiations +#if BOOST_COROSIO_HAS_EPOLL +template void run_socket_latency_benchmarks( + bench::result_collector&, char const* ); +#endif +#if BOOST_COROSIO_HAS_SELECT +template void run_socket_latency_benchmarks( + bench::result_collector&, char const* ); +#endif +#if BOOST_COROSIO_HAS_IOCP +template void run_socket_latency_benchmarks( + bench::result_collector&, char const* ); +#endif + +} // namespace corosio_bench diff --git a/bench/corosio/socket_throughput_bench.cpp b/bench/corosio/socket_throughput_bench.cpp index 6ecbd030..919a3535 100644 --- a/bench/corosio/socket_throughput_bench.cpp +++ b/bench/corosio/socket_throughput_bench.cpp @@ -7,6 +7,8 @@ // Official repository: https://github.com/cppalliance/corosio // +#include "benchmarks.hpp" + #include #include #include @@ -28,58 +30,52 @@ #include #endif -#include "../common/backend_selection.hpp" #include "../common/benchmark.hpp" namespace corosio = boost::corosio; namespace capy = boost::capy; -// Helper to set TCP_NODELAY on a socket for low latency -inline void set_nodelay(corosio::tcp_socket& s) +namespace corosio_bench { +namespace { + +inline void set_nodelay( corosio::tcp_socket& s ) { int flag = 1; #if BOOST_COROSIO_HAS_IOCP - ::setsockopt(static_cast(s.native_handle()), IPPROTO_TCP, TCP_NODELAY, - reinterpret_cast(&flag), sizeof(flag)); + ::setsockopt( static_cast( s.native_handle() ), IPPROTO_TCP, TCP_NODELAY, + reinterpret_cast( &flag ), sizeof( flag ) ); #else - ::setsockopt(s.native_handle(), IPPROTO_TCP, TCP_NODELAY, &flag, sizeof(flag)); + ::setsockopt( s.native_handle(), IPPROTO_TCP, TCP_NODELAY, &flag, sizeof( flag ) ); #endif } -// Measures maximum unidirectional data transfer rate over a loopback socket pair. -// One coroutine writes while another reads, testing the efficiency of async I/O -// operations. Runs with different buffer sizes to reveal the optimal chunk size -// for this platform. Small buffers stress syscall overhead; large buffers approach -// memory bandwidth limits. Useful for tuning buffer sizes in streaming protocols. template -bench::benchmark_result bench_throughput(std::size_t chunk_size, std::size_t total_bytes) +bench::benchmark_result bench_throughput( std::size_t chunk_size, std::size_t total_bytes ) { std::cout << " Buffer size: " << chunk_size << " bytes, "; - std::cout << "Transfer: " << (total_bytes / (1024 * 1024)) << " MB\n"; + std::cout << "Transfer: " << ( total_bytes / ( 1024 * 1024 ) ) << " MB\n"; Context ioc; - auto [writer, reader] = corosio::test::make_socket_pair(ioc); + auto [writer, reader] = corosio::test::make_socket_pair( ioc ); - // Disable Nagle's algorithm for fair comparison with Asio - set_nodelay(writer); - set_nodelay(reader); + set_nodelay( writer ); + set_nodelay( reader ); - std::vector write_buf(chunk_size, 'x'); - std::vector read_buf(chunk_size); + std::vector write_buf( chunk_size, 'x' ); + std::vector read_buf( chunk_size ); std::size_t total_written = 0; std::size_t total_read = 0; bool writer_done = false; - // Writer coroutine auto write_task = [&]() -> capy::task<> { - while (total_written < total_bytes) + while( total_written < total_bytes ) { - std::size_t to_write = (std::min)(chunk_size, total_bytes - total_written); + std::size_t to_write = ( std::min )( chunk_size, total_bytes - total_written ); auto [ec, n] = co_await writer.write_some( - capy::const_buffer(write_buf.data(), to_write)); - if (ec) + capy::const_buffer( write_buf.data(), to_write ) ); + if( ec ) { std::cerr << " Write error: " << ec.message() << "\n"; break; @@ -87,24 +83,23 @@ bench::benchmark_result bench_throughput(std::size_t chunk_size, std::size_t tot total_written += n; } writer_done = true; - writer.shutdown(corosio::tcp_socket::shutdown_send); + writer.shutdown( corosio::tcp_socket::shutdown_send ); }; - // Reader coroutine auto read_task = [&]() -> capy::task<> { - while (total_read < total_bytes) + while( total_read < total_bytes ) { auto [ec, n] = co_await reader.read_some( - capy::mutable_buffer(read_buf.data(), read_buf.size())); - if (ec) + capy::mutable_buffer( read_buf.data(), read_buf.size() ) ); + if( ec ) { - if (writer_done && total_read >= total_bytes) + if( writer_done && total_read >= total_bytes ) break; std::cerr << " Read error: " << ec.message() << "\n"; break; } - if (n == 0) + if( n == 0 ) break; total_read += n; } @@ -112,271 +107,173 @@ bench::benchmark_result bench_throughput(std::size_t chunk_size, std::size_t tot bench::stopwatch sw; - capy::run_async(ioc.get_executor())(write_task()); - capy::run_async(ioc.get_executor())(read_task()); + capy::run_async( ioc.get_executor() )( write_task() ); + capy::run_async( ioc.get_executor() )( read_task() ); ioc.run(); double elapsed = sw.elapsed_seconds(); - double throughput = static_cast(total_read) / elapsed; + double throughput = static_cast( total_read ) / elapsed; std::cout << " Written: " << total_written << " bytes\n"; std::cout << " Read: " << total_read << " bytes\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_throughput(throughput) << "\n\n"; + std::cout << " Throughput: " << bench::format_throughput( throughput ) << "\n\n"; writer.close(); reader.close(); - return bench::benchmark_result("throughput_" + std::to_string(chunk_size)) - .add("chunk_size", static_cast(chunk_size)) - .add("total_bytes", static_cast(total_bytes)) - .add("bytes_written", static_cast(total_written)) - .add("bytes_read", static_cast(total_read)) - .add("elapsed_s", elapsed) - .add("throughput_bytes_per_sec", throughput); + return bench::benchmark_result( "throughput_" + std::to_string( chunk_size ) ) + .add( "chunk_size", static_cast( chunk_size ) ) + .add( "total_bytes", static_cast( total_bytes ) ) + .add( "bytes_written", static_cast( total_written ) ) + .add( "bytes_read", static_cast( total_read ) ) + .add( "elapsed_s", elapsed ) + .add( "throughput_bytes_per_sec", throughput ); } -// Measures full-duplex throughput with both endpoints sending and receiving -// simultaneously. Four concurrent coroutines (two writers, two readers) stress -// the scheduler's ability to multiplex I/O efficiently. This pattern is common -// in protocols like WebSocket or gRPC where data flows in both directions. -// Combined throughput should ideally approach 2x unidirectional throughput. template -bench::benchmark_result bench_bidirectional_throughput(std::size_t chunk_size, std::size_t total_bytes) +bench::benchmark_result bench_bidirectional_throughput( std::size_t chunk_size, std::size_t total_bytes ) { std::cout << " Buffer size: " << chunk_size << " bytes, "; - std::cout << "Transfer: " << (total_bytes / (1024 * 1024)) << " MB each direction\n"; + std::cout << "Transfer: " << ( total_bytes / ( 1024 * 1024 ) ) << " MB each direction\n"; Context ioc; - auto [sock1, sock2] = corosio::test::make_socket_pair(ioc); + auto [sock1, sock2] = corosio::test::make_socket_pair( ioc ); - // Disable Nagle's algorithm for fair comparison with Asio - set_nodelay(sock1); - set_nodelay(sock2); + set_nodelay( sock1 ); + set_nodelay( sock2 ); - std::vector buf1(chunk_size, 'a'); - std::vector buf2(chunk_size, 'b'); + std::vector buf1( chunk_size, 'a' ); + std::vector buf2( chunk_size, 'b' ); std::size_t written1 = 0, read1 = 0; std::size_t written2 = 0, read2 = 0; - // Socket 1 writes to socket 2 auto write1_task = [&]() -> capy::task<> { - while (written1 < total_bytes) + while( written1 < total_bytes ) { - std::size_t to_write = (std::min)(chunk_size, total_bytes - written1); + std::size_t to_write = ( std::min )( chunk_size, total_bytes - written1 ); auto [ec, n] = co_await sock1.write_some( - capy::const_buffer(buf1.data(), to_write)); - if (ec) break; + capy::const_buffer( buf1.data(), to_write ) ); + if( ec ) break; written1 += n; } - sock1.shutdown(corosio::tcp_socket::shutdown_send); + sock1.shutdown( corosio::tcp_socket::shutdown_send ); }; - // Socket 2 reads from socket 1 auto read1_task = [&]() -> capy::task<> { - std::vector rbuf(chunk_size); - while (read1 < total_bytes) + std::vector rbuf( chunk_size ); + while( read1 < total_bytes ) { auto [ec, n] = co_await sock2.read_some( - capy::mutable_buffer(rbuf.data(), rbuf.size())); - if (ec || n == 0) break; + capy::mutable_buffer( rbuf.data(), rbuf.size() ) ); + if( ec || n == 0 ) break; read1 += n; } }; - // Socket 2 writes to socket 1 auto write2_task = [&]() -> capy::task<> { - while (written2 < total_bytes) + while( written2 < total_bytes ) { - std::size_t to_write = (std::min)(chunk_size, total_bytes - written2); + std::size_t to_write = ( std::min )( chunk_size, total_bytes - written2 ); auto [ec, n] = co_await sock2.write_some( - capy::const_buffer(buf2.data(), to_write)); - if (ec) break; + capy::const_buffer( buf2.data(), to_write ) ); + if( ec ) break; written2 += n; } - sock2.shutdown(corosio::tcp_socket::shutdown_send); + sock2.shutdown( corosio::tcp_socket::shutdown_send ); }; - // Socket 1 reads from socket 2 auto read2_task = [&]() -> capy::task<> { - std::vector rbuf(chunk_size); - while (read2 < total_bytes) + std::vector rbuf( chunk_size ); + while( read2 < total_bytes ) { auto [ec, n] = co_await sock1.read_some( - capy::mutable_buffer(rbuf.data(), rbuf.size())); - if (ec || n == 0) break; + capy::mutable_buffer( rbuf.data(), rbuf.size() ) ); + if( ec || n == 0 ) break; read2 += n; } }; bench::stopwatch sw; - capy::run_async(ioc.get_executor())(write1_task()); - capy::run_async(ioc.get_executor())(read1_task()); - capy::run_async(ioc.get_executor())(write2_task()); - capy::run_async(ioc.get_executor())(read2_task()); + capy::run_async( ioc.get_executor() )( write1_task() ); + capy::run_async( ioc.get_executor() )( read1_task() ); + capy::run_async( ioc.get_executor() )( write2_task() ); + capy::run_async( ioc.get_executor() )( read2_task() ); ioc.run(); double elapsed = sw.elapsed_seconds(); std::size_t total_transferred = read1 + read2; - double throughput = static_cast(total_transferred) / elapsed; + double throughput = static_cast( total_transferred ) / elapsed; std::cout << " Direction 1: " << read1 << " bytes\n"; std::cout << " Direction 2: " << read2 << " bytes\n"; std::cout << " Total: " << total_transferred << " bytes\n"; - std::cout << " Elapsed: " << std::fixed << std::setprecision(3) + std::cout << " Elapsed: " << std::fixed << std::setprecision( 3 ) << elapsed << " s\n"; - std::cout << " Throughput: " << bench::format_throughput(throughput) + std::cout << " Throughput: " << bench::format_throughput( throughput ) << " (combined)\n\n"; sock1.close(); sock2.close(); - return bench::benchmark_result("bidirectional_" + std::to_string(chunk_size)) - .add("chunk_size", static_cast(chunk_size)) - .add("total_bytes_per_direction", static_cast(total_bytes)) - .add("bytes_direction1", static_cast(read1)) - .add("bytes_direction2", static_cast(read2)) - .add("total_transferred", static_cast(total_transferred)) - .add("elapsed_s", elapsed) - .add("throughput_bytes_per_sec", throughput); + return bench::benchmark_result( "bidirectional_" + std::to_string( chunk_size ) ) + .add( "chunk_size", static_cast( chunk_size ) ) + .add( "total_bytes_per_direction", static_cast( total_bytes ) ) + .add( "bytes_direction1", static_cast( read1 ) ) + .add( "bytes_direction2", static_cast( read2 ) ) + .add( "total_transferred", static_cast( total_transferred ) ) + .add( "elapsed_s", elapsed ) + .add( "throughput_bytes_per_sec", throughput ); } -// Run benchmarks for a specific context type +} // anonymous namespace + template -void run_benchmarks(const char* backend_name, const char* output_file, const char* bench_filter) +void run_socket_throughput_benchmarks( + bench::result_collector& collector, + char const* filter ) { - std::cout << "Boost.Corosio Socket Throughput Benchmarks\n"; - std::cout << "==========================================\n"; - std::cout << "Backend: " << backend_name << "\n\n"; + std::cout << "\n>>> Socket Throughput Benchmarks <<<\n"; - bench::result_collector collector(backend_name); + bool run_all = !filter || std::strcmp( filter, "all" ) == 0; - bool run_all = !bench_filter || std::strcmp(bench_filter, "all") == 0; - - // Variable buffer sizes - std::vector buffer_sizes = {1024, 4096, 16384, 65536}; - std::size_t transfer_size = 64 * 1024 * 1024; // 64 MB - - if (run_all || std::strcmp(bench_filter, "unidirectional") == 0) - { - bench::print_header("Unidirectional Throughput"); - for (auto size : buffer_sizes) - collector.add(bench_throughput(size, transfer_size)); - } + std::vector buffer_sizes = { 1024, 4096, 16384, 65536 }; + std::size_t transfer_size = 64 * 1024 * 1024; - if (run_all || std::strcmp(bench_filter, "bidirectional") == 0) + if( run_all || std::strcmp( filter, "unidirectional" ) == 0 ) { - bench::print_header("Bidirectional Throughput"); - for (auto size : buffer_sizes) - collector.add(bench_bidirectional_throughput(size, transfer_size / 2)); + bench::print_header( "Unidirectional Throughput" ); + for( auto size : buffer_sizes ) + collector.add( bench_throughput( size, transfer_size ) ); } - std::cout << "\nBenchmarks complete.\n"; - - if (output_file) + if( run_all || std::strcmp( filter, "bidirectional" ) == 0 ) { - if (collector.write_json(output_file)) - std::cout << "Results written to: " << output_file << "\n"; - else - std::cerr << "Error: Failed to write results to: " << output_file << "\n"; + bench::print_header( "Bidirectional Throughput" ); + for( auto size : buffer_sizes ) + collector.add( bench_bidirectional_throughput( size, transfer_size / 2 ) ); } } -void print_usage(const char* program_name) -{ - std::cout << "Usage: " << program_name << " [OPTIONS]\n\n"; - std::cout << "Options:\n"; - std::cout << " --backend Select I/O backend (default: platform default)\n"; - std::cout << " --bench Run only the specified benchmark\n"; - std::cout << " --output Write JSON results to file\n"; - std::cout << " --list List available backends\n"; - std::cout << " --help Show this help message\n"; - std::cout << "\n"; - std::cout << "Available benchmarks:\n"; - std::cout << " unidirectional Unidirectional throughput (various buffer sizes)\n"; - std::cout << " bidirectional Bidirectional throughput (various buffer sizes)\n"; - std::cout << " all Run all benchmarks (default)\n"; - std::cout << "\n"; - bench::print_available_backends(); -} - -int main(int argc, char* argv[]) -{ - const char* backend = nullptr; - const char* output_file = nullptr; - const char* bench_filter = nullptr; - - for (int i = 1; i < argc; ++i) - { - if (std::strcmp(argv[i], "--backend") == 0) - { - if (i + 1 < argc) - { - backend = argv[++i]; - } - else - { - std::cerr << "Error: --backend requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--bench") == 0) - { - if (i + 1 < argc) - { - bench_filter = argv[++i]; - } - else - { - std::cerr << "Error: --bench requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--output") == 0) - { - if (i + 1 < argc) - { - output_file = argv[++i]; - } - else - { - std::cerr << "Error: --output requires an argument\n"; - return 1; - } - } - else if (std::strcmp(argv[i], "--list") == 0) - { - bench::print_available_backends(); - return 0; - } - else if (std::strcmp(argv[i], "--help") == 0 || std::strcmp(argv[i], "-h") == 0) - { - print_usage(argv[0]); - return 0; - } - else - { - std::cerr << "Unknown option: " << argv[i] << "\n"; - print_usage(argv[0]); - return 1; - } - } - - // If no backend specified, use platform default - if (!backend) - backend = bench::default_backend_name(); +// Explicit instantiations +#if BOOST_COROSIO_HAS_EPOLL +template void run_socket_throughput_benchmarks( + bench::result_collector&, char const* ); +#endif +#if BOOST_COROSIO_HAS_SELECT +template void run_socket_throughput_benchmarks( + bench::result_collector&, char const* ); +#endif +#if BOOST_COROSIO_HAS_IOCP +template void run_socket_throughput_benchmarks( + bench::result_collector&, char const* ); +#endif - // Dispatch to the selected backend using a generic lambda - return bench::dispatch_backend(backend, - [=](const char* name) - { - run_benchmarks(name, output_file, bench_filter); - }); -} +} // namespace corosio_bench From b2c75b11fce04d57858f3789c626ab594920a4dd Mon Sep 17 00:00:00 2001 From: Steve Gerbino Date: Wed, 4 Feb 2026 15:03:20 +0100 Subject: [PATCH 2/2] Add warmup and increase benchmark iterations - Add warmup phase to all benchmarks to reduce variance - Remove category headers from output - Increase iterations for more stable results: - io_context: 5M handlers - socket_latency: 1M iterations - socket_throughput: 4GB transfer - http_server: 1M requests --- bench/asio/http_server_bench.cpp | 36 ++++++++++++------ bench/asio/io_context_bench.cpp | 10 ++--- bench/asio/socket_latency_bench.cpp | 24 +++++++++--- bench/asio/socket_throughput_bench.cpp | 15 ++++++-- bench/corosio/http_server_bench.cpp | 45 +++++++++++++++++------ bench/corosio/io_context_bench.cpp | 10 ++--- bench/corosio/socket_latency_bench.cpp | 29 ++++++++++++--- bench/corosio/socket_throughput_bench.cpp | 20 ++++++++-- 8 files changed, 137 insertions(+), 52 deletions(-) diff --git a/bench/asio/http_server_bench.cpp b/bench/asio/http_server_bench.cpp index 17da83f1..34488648 100644 --- a/bench/asio/http_server_bench.cpp +++ b/bench/asio/http_server_bench.cpp @@ -320,14 +320,28 @@ void run_http_server_benchmarks( bench::result_collector& collector, char const* filter ) { - std::cout << "\n>>> HTTP Server Benchmarks (Asio) <<<\n"; - bool run_all = !filter || std::strcmp( filter, "all" ) == 0; + // Warm up + { + asio::io_context ioc; + auto [c, s] = make_socket_pair( ioc ); + char buf[256] = {}; + for( int i = 0; i < 10; ++i ) + { + asio::write( c, asio::buffer( bench::http::small_request, bench::http::small_request_size ) ); + asio::read( s, asio::buffer( buf, bench::http::small_request_size ) ); + asio::write( s, asio::buffer( bench::http::small_response, bench::http::small_response_size ) ); + asio::read( c, asio::buffer( buf, bench::http::small_response_size ) ); + } + c.close(); + s.close(); + } + if( run_all || std::strcmp( filter, "single_conn" ) == 0 ) { bench::print_header( "Single Connection (Sequential Requests)" ); - collector.add( bench_single_connection( 10000 ) ); + collector.add( bench_single_connection( 1000000 ) ); } if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) @@ -335,10 +349,10 @@ void run_http_server_benchmarks( if( run_all ) std::this_thread::sleep_for( std::chrono::seconds( 5 ) ); bench::print_header( "Concurrent Connections" ); - collector.add( bench_concurrent_connections( 1, 10000 ) ); - collector.add( bench_concurrent_connections( 4, 2500 ) ); - collector.add( bench_concurrent_connections( 16, 625 ) ); - collector.add( bench_concurrent_connections( 32, 312 ) ); + collector.add( bench_concurrent_connections( 1, 1000000 ) ); + collector.add( bench_concurrent_connections( 4, 250000 ) ); + collector.add( bench_concurrent_connections( 16, 62500 ) ); + collector.add( bench_concurrent_connections( 32, 31250 ) ); } if( run_all || std::strcmp( filter, "multithread" ) == 0 ) @@ -346,10 +360,10 @@ void run_http_server_benchmarks( if( run_all ) std::this_thread::sleep_for( std::chrono::seconds( 5 ) ); bench::print_header( "Multi-threaded (32 connections, varying threads)" ); - collector.add( bench_multithread( 1, 32, 312 ) ); - collector.add( bench_multithread( 2, 32, 312 ) ); - collector.add( bench_multithread( 4, 32, 312 ) ); - collector.add( bench_multithread( 8, 32, 312 ) ); + collector.add( bench_multithread( 1, 32, 31250 ) ); + collector.add( bench_multithread( 2, 32, 31250 ) ); + collector.add( bench_multithread( 4, 32, 31250 ) ); + collector.add( bench_multithread( 8, 32, 31250 ) ); } } diff --git a/bench/asio/io_context_bench.cpp b/bench/asio/io_context_bench.cpp index 987768bd..0f641708 100644 --- a/bench/asio/io_context_bench.cpp +++ b/bench/asio/io_context_bench.cpp @@ -227,8 +227,6 @@ void run_io_context_benchmarks( bench::result_collector& collector, char const* filter ) { - std::cout << "\n>>> io_context Benchmarks (Asio) <<<\n"; - bool run_all = !filter || std::strcmp( filter, "all" ) == 0; // Warm up @@ -241,16 +239,16 @@ void run_io_context_benchmarks( } if( run_all || std::strcmp( filter, "single_threaded" ) == 0 ) - collector.add( bench_single_threaded_post( 1000000 ) ); + collector.add( bench_single_threaded_post( 5000000 ) ); if( run_all || std::strcmp( filter, "multithreaded" ) == 0 ) - collector.add( bench_multithreaded_scaling( 1000000, 8 ) ); + collector.add( bench_multithreaded_scaling( 5000000, 8 ) ); if( run_all || std::strcmp( filter, "interleaved" ) == 0 ) - collector.add( bench_interleaved_post_run( 10000, 100 ) ); + collector.add( bench_interleaved_post_run( 50000, 100 ) ); if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) - collector.add( bench_concurrent_post_run( 4, 250000 ) ); + collector.add( bench_concurrent_post_run( 4, 1250000 ) ); } } // namespace asio_bench diff --git a/bench/asio/socket_latency_bench.cpp b/bench/asio/socket_latency_bench.cpp index a8a73453..5a686925 100644 --- a/bench/asio/socket_latency_bench.cpp +++ b/bench/asio/socket_latency_bench.cpp @@ -170,12 +170,24 @@ void run_socket_latency_benchmarks( bench::result_collector& collector, char const* filter ) { - std::cout << "\n>>> Socket Latency Benchmarks (Asio) <<<\n"; - bool run_all = !filter || std::strcmp( filter, "all" ) == 0; + // Warm up + { + asio::io_context ioc; + auto [c, s] = make_socket_pair( ioc ); + char buf[64] = {}; + for( int i = 0; i < 100; ++i ) + { + asio::write( c, asio::buffer( buf ) ); + asio::read( s, asio::buffer( buf ) ); + } + c.close(); + s.close(); + } + std::vector message_sizes = { 1, 64, 1024 }; - int iterations = 1000; + int iterations = 1000000; if( run_all || std::strcmp( filter, "pingpong" ) == 0 ) { @@ -187,9 +199,9 @@ void run_socket_latency_benchmarks( if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) { bench::print_header( "Concurrent Socket Pairs Latency (Asio)" ); - collector.add( bench_concurrent_latency( 1, 64, 1000 ) ); - collector.add( bench_concurrent_latency( 4, 64, 500 ) ); - collector.add( bench_concurrent_latency( 16, 64, 250 ) ); + collector.add( bench_concurrent_latency( 1, 64, 1000000 ) ); + collector.add( bench_concurrent_latency( 4, 64, 500000 ) ); + collector.add( bench_concurrent_latency( 16, 64, 250000 ) ); } } diff --git a/bench/asio/socket_throughput_bench.cpp b/bench/asio/socket_throughput_bench.cpp index c2c51df3..f2b6f577 100644 --- a/bench/asio/socket_throughput_bench.cpp +++ b/bench/asio/socket_throughput_bench.cpp @@ -223,12 +223,21 @@ void run_socket_throughput_benchmarks( bench::result_collector& collector, char const* filter ) { - std::cout << "\n>>> Socket Throughput Benchmarks (Asio) <<<\n"; - bool run_all = !filter || std::strcmp( filter, "all" ) == 0; + // Warm up + { + asio::io_context ioc; + auto [w, r] = make_socket_pair( ioc ); + std::vector buf( 4096, 'w' ); + asio::write( w, asio::buffer( buf ) ); + asio::read( r, asio::buffer( buf ) ); + w.close(); + r.close(); + } + std::vector buffer_sizes = { 1024, 4096, 16384, 65536 }; - std::size_t transfer_size = 64 * 1024 * 1024; + std::size_t transfer_size = 4ULL * 1024 * 1024 * 1024; if( run_all || std::strcmp( filter, "unidirectional" ) == 0 ) { diff --git a/bench/corosio/http_server_bench.cpp b/bench/corosio/http_server_bench.cpp index 8f719c7e..4a642786 100644 --- a/bench/corosio/http_server_bench.cpp +++ b/bench/corosio/http_server_bench.cpp @@ -320,14 +320,37 @@ void run_http_server_benchmarks( bench::result_collector& collector, char const* filter ) { - std::cout << "\n>>> HTTP Server Benchmarks <<<\n"; - bool run_all = !filter || std::strcmp( filter, "all" ) == 0; + // Warm up + { + Context ioc; + auto [c, s] = corosio::test::make_socket_pair( ioc ); + char buf[256] = {}; + auto task = [&]() -> capy::task<> + { + for( int i = 0; i < 10; ++i ) + { + (void)co_await capy::write( + c, capy::const_buffer( bench::http::small_request, bench::http::small_request_size ) ); + (void)co_await s.read_some( + capy::mutable_buffer( buf, bench::http::small_request_size ) ); + (void)co_await capy::write( + s, capy::const_buffer( bench::http::small_response, bench::http::small_response_size ) ); + (void)co_await c.read_some( + capy::mutable_buffer( buf, bench::http::small_response_size ) ); + } + }; + capy::run_async( ioc.get_executor() )( task() ); + ioc.run(); + c.close(); + s.close(); + } + if( run_all || std::strcmp( filter, "single_conn" ) == 0 ) { bench::print_header( "Single Connection (Sequential Requests)" ); - collector.add( bench_single_connection( 10000 ) ); + collector.add( bench_single_connection( 1000000 ) ); } if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) @@ -335,10 +358,10 @@ void run_http_server_benchmarks( if( run_all ) std::this_thread::sleep_for( std::chrono::seconds( 5 ) ); bench::print_header( "Concurrent Connections" ); - collector.add( bench_concurrent_connections( 1, 10000 ) ); - collector.add( bench_concurrent_connections( 4, 2500 ) ); - collector.add( bench_concurrent_connections( 16, 625 ) ); - collector.add( bench_concurrent_connections( 32, 312 ) ); + collector.add( bench_concurrent_connections( 1, 1000000 ) ); + collector.add( bench_concurrent_connections( 4, 250000 ) ); + collector.add( bench_concurrent_connections( 16, 62500 ) ); + collector.add( bench_concurrent_connections( 32, 31250 ) ); } if( run_all || std::strcmp( filter, "multithread" ) == 0 ) @@ -346,10 +369,10 @@ void run_http_server_benchmarks( if( run_all ) std::this_thread::sleep_for( std::chrono::seconds( 5 ) ); bench::print_header( "Multi-threaded (32 connections, varying threads)" ); - collector.add( bench_multithread( 1, 32, 312 ) ); - collector.add( bench_multithread( 2, 32, 312 ) ); - collector.add( bench_multithread( 4, 32, 312 ) ); - collector.add( bench_multithread( 8, 32, 312 ) ); + collector.add( bench_multithread( 1, 32, 31250 ) ); + collector.add( bench_multithread( 2, 32, 31250 ) ); + collector.add( bench_multithread( 4, 32, 31250 ) ); + collector.add( bench_multithread( 8, 32, 31250 ) ); } } diff --git a/bench/corosio/io_context_bench.cpp b/bench/corosio/io_context_bench.cpp index 57d033d8..b097761e 100644 --- a/bench/corosio/io_context_bench.cpp +++ b/bench/corosio/io_context_bench.cpp @@ -234,8 +234,6 @@ void run_io_context_benchmarks( bench::result_collector& collector, char const* filter ) { - std::cout << "\n>>> io_context Benchmarks <<<\n"; - bool run_all = !filter || std::strcmp( filter, "all" ) == 0; // Warm up @@ -249,16 +247,16 @@ void run_io_context_benchmarks( } if( run_all || std::strcmp( filter, "single_threaded" ) == 0 ) - collector.add( bench_single_threaded_post( 1000000 ) ); + collector.add( bench_single_threaded_post( 5000000 ) ); if( run_all || std::strcmp( filter, "multithreaded" ) == 0 ) - collector.add( bench_multithreaded_scaling( 1000000, 8 ) ); + collector.add( bench_multithreaded_scaling( 5000000, 8 ) ); if( run_all || std::strcmp( filter, "interleaved" ) == 0 ) - collector.add( bench_interleaved_post_run( 10000, 100 ) ); + collector.add( bench_interleaved_post_run( 50000, 100 ) ); if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) - collector.add( bench_concurrent_post_run( 4, 250000 ) ); + collector.add( bench_concurrent_post_run( 4, 1250000 ) ); } // Explicit instantiations diff --git a/bench/corosio/socket_latency_bench.cpp b/bench/corosio/socket_latency_bench.cpp index 9ecd0c84..c2312aad 100644 --- a/bench/corosio/socket_latency_bench.cpp +++ b/bench/corosio/socket_latency_bench.cpp @@ -188,12 +188,29 @@ void run_socket_latency_benchmarks( bench::result_collector& collector, char const* filter ) { - std::cout << "\n>>> Socket Latency Benchmarks <<<\n"; - bool run_all = !filter || std::strcmp( filter, "all" ) == 0; + // Warm up + { + Context ioc; + auto [c, s] = corosio::test::make_socket_pair( ioc ); + char buf[64] = {}; + auto task = [&]() -> capy::task<> + { + for( int i = 0; i < 100; ++i ) + { + (void)co_await c.write_some( capy::const_buffer( buf, sizeof( buf ) ) ); + (void)co_await s.read_some( capy::mutable_buffer( buf, sizeof( buf ) ) ); + } + }; + capy::run_async( ioc.get_executor() )( task() ); + ioc.run(); + c.close(); + s.close(); + } + std::vector message_sizes = { 1, 64, 1024 }; - int iterations = 1000; + int iterations = 1000000; if( run_all || std::strcmp( filter, "pingpong" ) == 0 ) { @@ -205,9 +222,9 @@ void run_socket_latency_benchmarks( if( run_all || std::strcmp( filter, "concurrent" ) == 0 ) { bench::print_header( "Concurrent Socket Pairs Latency" ); - collector.add( bench_concurrent_latency( 1, 64, 1000 ) ); - collector.add( bench_concurrent_latency( 4, 64, 500 ) ); - collector.add( bench_concurrent_latency( 16, 64, 250 ) ); + collector.add( bench_concurrent_latency( 1, 64, 1000000 ) ); + collector.add( bench_concurrent_latency( 4, 64, 500000 ) ); + collector.add( bench_concurrent_latency( 16, 64, 250000 ) ); } } diff --git a/bench/corosio/socket_throughput_bench.cpp b/bench/corosio/socket_throughput_bench.cpp index 919a3535..748859d4 100644 --- a/bench/corosio/socket_throughput_bench.cpp +++ b/bench/corosio/socket_throughput_bench.cpp @@ -240,12 +240,26 @@ void run_socket_throughput_benchmarks( bench::result_collector& collector, char const* filter ) { - std::cout << "\n>>> Socket Throughput Benchmarks <<<\n"; - bool run_all = !filter || std::strcmp( filter, "all" ) == 0; + // Warm up + { + Context ioc; + auto [w, r] = corosio::test::make_socket_pair( ioc ); + std::vector buf( 4096, 'w' ); + auto task = [&]() -> capy::task<> + { + (void)co_await w.write_some( capy::const_buffer( buf.data(), buf.size() ) ); + (void)co_await r.read_some( capy::mutable_buffer( buf.data(), buf.size() ) ); + }; + capy::run_async( ioc.get_executor() )( task() ); + ioc.run(); + w.close(); + r.close(); + } + std::vector buffer_sizes = { 1024, 4096, 16384, 65536 }; - std::size_t transfer_size = 64 * 1024 * 1024; + std::size_t transfer_size = 4ULL * 1024 * 1024 * 1024; if( run_all || std::strcmp( filter, "unidirectional" ) == 0 ) {