Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/boost/charconv/detail/integer_search_trees.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ BOOST_CHARCONV_CXX14_CONSTEXPR int num_digits(T x) noexcept
}

template <>
BOOST_CHARCONV_CXX14_CONSTEXPR int num_digits(std::uint32_t x) noexcept
BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CXX14_CONSTEXPR int num_digits(std::uint32_t x) noexcept
{
if (x >= UINT32_C(10000))
{
Expand Down Expand Up @@ -76,7 +76,7 @@ BOOST_CHARCONV_CXX14_CONSTEXPR int num_digits(std::uint32_t x) noexcept
}

template <>
BOOST_CHARCONV_CXX14_CONSTEXPR int num_digits(std::uint64_t x) noexcept
BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CXX14_CONSTEXPR int num_digits(std::uint64_t x) noexcept
{
if (x >= UINT64_C(10000000000))
{
Expand Down
3 changes: 3 additions & 0 deletions test/cuda_jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,6 @@ run test_to_chars_bases_long.cu ;
run test_to_chars_bases_unsigned_long.cu ;
run test_to_chars_bases_long_long.cu ;
run test_to_chars_bases_unsigned_long_long.cu ;

run test_num_digits_uint32.cu ;
run test_num_digits_uint64.cu ;
128 changes: 128 additions & 0 deletions test/test_num_digits_uint32.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// Copyright Matt Borland 2024 - 2026.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <limits>
#include <boost/charconv/detail/integer_search_trees.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>

using test_type = std::uint32_t;

__global__ void cuda_test(const test_type *in, int *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = boost::charconv::detail::num_digits(in[i]);
}
}

/**
* Host main routine
*/
int main(void)
{
std::mt19937_64 rng {42};

// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

// Allocate the managed input vector
cuda_managed_ptr<test_type> input_vector(numElements);

// Allocate the managed output vector
cuda_managed_ptr<int> output_vector(numElements);

// Initialize the input vectors with random values across the full range
std::uniform_int_distribution<test_type> dist {1, (std::numeric_limits<test_type>::max)()};
for (std::size_t i = 0; i < numElements; ++i)
{
input_vector[i] = dist(rng);
}

// Also test boundary values at specific digit counts
// 1-digit: 1-9, 2-digit: 10-99, ..., 10-digit: 1000000000-4294967295
test_type boundaries[] = {
UINT32_C(0),
UINT32_C(1),
UINT32_C(9),
UINT32_C(10),
UINT32_C(99),
UINT32_C(100),
UINT32_C(999),
UINT32_C(1000),
UINT32_C(9999),
UINT32_C(10000),
UINT32_C(99999),
UINT32_C(100000),
UINT32_C(999999),
UINT32_C(1000000),
UINT32_C(9999999),
UINT32_C(10000000),
UINT32_C(99999999),
UINT32_C(100000000),
UINT32_C(999999999),
UINT32_C(1000000000),
UINT32_C(4294967295)
};
int num_boundaries = sizeof(boundaries) / sizeof(boundaries[0]);
for (int i = 0; i < num_boundaries && i < numElements; ++i)
{
input_vector[i] = boundaries[i];
}

// Launch the CUDA Kernel
int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;

err = cudaGetLastError();

if (err != cudaSuccess)
{
std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}

// Verify that the result vector is correct
w.reset();
for(int i = 0; i < numElements; ++i)
{
int cpu_result = boost::charconv::detail::num_digits(input_vector[i]);
if (output_vector[i] != cpu_result)
{
std::cerr << "Result verification failed at element " << i
<< ": input=" << input_vector[i]
<< " gpu=" << output_vector[i]
<< " cpu=" << cpu_result << "!" << std::endl;
return EXIT_FAILURE;
}
}
double t = w.elapsed();

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
148 changes: 148 additions & 0 deletions test/test_num_digits_uint64.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// Copyright Matt Borland 2024 - 2026.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <limits>
#include <boost/charconv/detail/integer_search_trees.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>

using test_type = std::uint64_t;

__global__ void cuda_test(const test_type *in, int *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = boost::charconv::detail::num_digits(in[i]);
}
}

/**
* Host main routine
*/
int main(void)
{
std::mt19937_64 rng {42};

// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

// Allocate the managed input vector
cuda_managed_ptr<test_type> input_vector(numElements);

// Allocate the managed output vector
cuda_managed_ptr<int> output_vector(numElements);

// Initialize the input vectors with random values across the full range
std::uniform_int_distribution<test_type> dist {1, (std::numeric_limits<test_type>::max)()};
for (std::size_t i = 0; i < numElements; ++i)
{
input_vector[i] = dist(rng);
}

// Also test boundary values at specific digit counts
// 1-digit: 1-9, 2-digit: 10-99, ..., 20-digit: 10000000000000000000-18446744073709551615
test_type boundaries[] = {
UINT64_C(0),
UINT64_C(1),
UINT64_C(9),
UINT64_C(10),
UINT64_C(99),
UINT64_C(100),
UINT64_C(999),
UINT64_C(1000),
UINT64_C(9999),
UINT64_C(10000),
UINT64_C(99999),
UINT64_C(100000),
UINT64_C(999999),
UINT64_C(1000000),
UINT64_C(9999999),
UINT64_C(10000000),
UINT64_C(99999999),
UINT64_C(100000000),
UINT64_C(999999999),
UINT64_C(1000000000),
UINT64_C(9999999999),
UINT64_C(10000000000),
UINT64_C(99999999999),
UINT64_C(100000000000),
UINT64_C(999999999999),
UINT64_C(1000000000000),
UINT64_C(9999999999999),
UINT64_C(10000000000000),
UINT64_C(99999999999999),
UINT64_C(100000000000000),
UINT64_C(999999999999999),
UINT64_C(1000000000000000),
UINT64_C(9999999999999999),
UINT64_C(10000000000000000),
UINT64_C(99999999999999999),
UINT64_C(100000000000000000),
UINT64_C(999999999999999999),
UINT64_C(1000000000000000000),
UINT64_C(9999999999999999999),
UINT64_C(10000000000000000000),
UINT64_C(18446744073709551615)
};
int num_boundaries = sizeof(boundaries) / sizeof(boundaries[0]);
for (int i = 0; i < num_boundaries && i < numElements; ++i)
{
input_vector[i] = boundaries[i];
}

// Launch the CUDA Kernel
int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;

err = cudaGetLastError();

if (err != cudaSuccess)
{
std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}

// Verify that the result vector is correct
w.reset();
for(int i = 0; i < numElements; ++i)
{
int cpu_result = boost::charconv::detail::num_digits(input_vector[i]);
if (output_vector[i] != cpu_result)
{
std::cerr << "Result verification failed at element " << i
<< ": input=" << input_vector[i]
<< " gpu=" << output_vector[i]
<< " cpu=" << cpu_result << "!" << std::endl;
return EXIT_FAILURE;
}
}
double t = w.elapsed();

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
Loading