From 960bc1587f56d12583f07f172592aff6ca0a059e Mon Sep 17 00:00:00 2001 From: Jonathan Hao Date: Fri, 13 Feb 2026 13:55:29 +0000 Subject: [PATCH] fix: heap-allocate thread-local random buffer to reduce thread creation overhead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 1 MiB `thread_local` random buffer in `engine.cpp` was stored inline in the TLS segment (.tbss), causing every `pthread_create` to allocate and zero-initialize 1 MiB per thread. With 31 worker threads this added ~18 ms of overhead to thread pool creation — making UltraHonk verification slower at 32 cores than at 8 cores. Moving the buffer to heap allocation on first use reduces the TLS footprint from ~1 MiB to 16 bytes per thread, dropping thread pool creation from 18 ms to ~1.6 ms for 31 threads. Benchmark results (parity_base circuit, noir-rollup verifier target): Cores | Before | After 8 | 63 ms | 59 ms 32 | 72 ms | 51 ms 64 | 87 ms | 53 ms Resolves AztecProtocol/barretenberg#1624 --- .../cpp/src/barretenberg/numeric/random/engine.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/numeric/random/engine.cpp b/barretenberg/cpp/src/barretenberg/numeric/random/engine.cpp index 6d30a12a75e6..a1eb6a6d259b 100644 --- a/barretenberg/cpp/src/barretenberg/numeric/random/engine.cpp +++ b/barretenberg/cpp/src/barretenberg/numeric/random/engine.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #if defined(__APPLE__) #include @@ -44,8 +45,9 @@ constexpr size_t RANDOM_BUFFER_SIZE = 1UL << 20; #endif struct RandomBufferWrapper { - // Buffer with randomness sampled from a CSPRNG - uint8_t buffer[RANDOM_BUFFER_SIZE]; + // Buffer with randomness sampled from a CSPRNG (heap-allocated on first use to avoid + // bloating TLS — a 1 MiB inline array adds ~0.6 ms per thread creation) + std::unique_ptr buffer; // Offset into the unused part of the buffer ssize_t offset = -1; }; @@ -67,8 +69,11 @@ template std::array(random_buffer_wrapper.offset) + random_data_buffer_size) > RANDOM_BUFFER_SIZE) { + if (!random_buffer_wrapper.buffer) { + random_buffer_wrapper.buffer = std::make_unique(RANDOM_BUFFER_SIZE); + } size_t bytes_left = RANDOM_BUFFER_SIZE; - uint8_t* current_offset = random_buffer_wrapper.buffer; + uint8_t* current_offset = random_buffer_wrapper.buffer.get(); // Sample until we fill the buffer while (bytes_left != 0) { #if defined(__wasm__) || defined(__APPLE__) @@ -97,7 +102,7 @@ template std::array(random_data_buffer_size); return random_data; }