diff --git a/include/boost/charconv/detail/apply_sign.hpp b/include/boost/charconv/detail/apply_sign.hpp index ef6db714..5e5c79e6 100644 --- a/include/boost/charconv/detail/apply_sign.hpp +++ b/include/boost/charconv/detail/apply_sign.hpp @@ -26,13 +26,13 @@ namespace boost { namespace charconv { namespace detail { template , typename std::enable_if::value, bool>::type = true> -constexpr Unsigned_Integer apply_sign(Integer val) noexcept +BOOST_CHARCONV_HOST_DEVICE constexpr Unsigned_Integer apply_sign(Integer val) noexcept { return -(static_cast(val)); } template ::value, bool>::type = true> -constexpr Unsigned_Integer apply_sign(Unsigned_Integer val) noexcept +BOOST_CHARCONV_HOST_DEVICE constexpr Unsigned_Integer apply_sign(Unsigned_Integer val) noexcept { return val; } diff --git a/include/boost/charconv/detail/memcpy.hpp b/include/boost/charconv/detail/memcpy.hpp index 1e68315f..4f30dccd 100644 --- a/include/boost/charconv/detail/memcpy.hpp +++ b/include/boost/charconv/detail/memcpy.hpp @@ -25,6 +25,22 @@ namespace boost { namespace charconv { namespace detail { +#ifdef __NVCC__ + +__host__ __device__ constexpr char* memcpy(char* dest, const char* src, std::size_t count) +{ + for (std::size_t i = 0; i < count; ++i) + { + *(dest + i) = *(src + i); + } + + return dest; +} + +#define BOOST_CHARCONV_CONSTEXPR constexpr + +#else + #if !defined(BOOST_CHARCONV_NO_CONSTEXPR_DETECTION) && defined(BOOST_CXX14_CONSTEXPR) #define BOOST_CHARCONV_CONSTEXPR constexpr @@ -69,6 +85,8 @@ inline void* memcpy(void* dest, const void* src, std::size_t count) #endif +#endif // NVCC + }}} // Namespace boost::charconv::detail #ifdef BOOST_CHARCONV_STRINGOP_OVERFLOW_DISABLED diff --git a/include/boost/charconv/detail/to_chars_integer_impl.hpp b/include/boost/charconv/detail/to_chars_integer_impl.hpp index 5b256c26..e6e7b3ca 100644 --- a/include/boost/charconv/detail/to_chars_integer_impl.hpp +++ b/include/boost/charconv/detail/to_chars_integer_impl.hpp @@ -52,6 +52,8 @@ static constexpr char radix_table[] = { '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' }; +#ifndef __NVCC__ + static constexpr char digit_table[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', @@ -301,6 +303,8 @@ BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_128integer_impl(char* first, c return {first + converted_value_digits, std::errc()}; } +#endif // __NVCC__ + // Conversion warning from shift operators with unsigned char #if defined(__GNUC__) && __GNUC__ >= 5 # pragma GCC diagnostic push @@ -313,8 +317,19 @@ BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_128integer_impl(char* first, c // All other bases // Use a simple lookup table to put together the Integer in character form template -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_integer_impl(char* first, char* last, Integer value, int base) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_integer_impl(char* first, char* last, Integer value, int base) noexcept { + #ifdef __NVCC__ + + constexpr char digit_table[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', + 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', + 'u', 'v', 'w', 'x', 'y', 'z' + }; + + #endif + if (!((first <= last) && (base >= 2 && base <= 36))) { return {last, std::errc::invalid_argument}; @@ -381,6 +396,18 @@ BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_integer_impl(char* first, char } break; + #ifdef __NVCC__ + + case 10: + while (unsigned_value != static_cast(0)) + { + *end-- = static_cast(zero + (unsigned_value % 10U)); + unsigned_value /= 10U; + } + break; + + #endif + case 16: while (unsigned_value != static_cast(0)) { @@ -430,13 +457,18 @@ BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_integer_impl(char* first, char #endif template -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_int(char* first, char* last, Integer value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_int(char* first, char* last, Integer value, int base = 10) noexcept { using Unsigned_Integer = typename std::make_unsigned::type; + + // The specialized base 10 path requires lookup tables and memcpy + // On device, we instead use the trivial divide and mod to avoid these + #ifndef __NVCC__ if (base == 10) { return to_chars_integer_impl(first, last, value); } + #endif return to_chars_integer_impl(first, last, value, base); } diff --git a/include/boost/charconv/detail/to_chars_result.hpp b/include/boost/charconv/detail/to_chars_result.hpp index e564fe6c..a0fedbbc 100644 --- a/include/boost/charconv/detail/to_chars_result.hpp +++ b/include/boost/charconv/detail/to_chars_result.hpp @@ -5,6 +5,7 @@ #ifndef BOOST_CHARCONV_DETAIL_TO_CHARS_RESULT_HPP #define BOOST_CHARCONV_DETAIL_TO_CHARS_RESULT_HPP +#include #include // 22.13.2, Primitive numerical output conversion @@ -16,17 +17,17 @@ struct to_chars_result char *ptr; std::errc ec; - constexpr friend bool operator==(const to_chars_result &lhs, const to_chars_result &rhs) noexcept + BOOST_CHARCONV_HOST_DEVICE constexpr friend bool operator==(const to_chars_result &lhs, const to_chars_result &rhs) noexcept { return lhs.ptr == rhs.ptr && lhs.ec == rhs.ec; } - constexpr friend bool operator!=(const to_chars_result &lhs, const to_chars_result &rhs) noexcept + BOOST_CHARCONV_HOST_DEVICE constexpr friend bool operator!=(const to_chars_result &lhs, const to_chars_result &rhs) noexcept { return !(lhs == rhs); } - constexpr explicit operator bool() const noexcept { return ec == std::errc{}; } + BOOST_CHARCONV_HOST_DEVICE constexpr explicit operator bool() const noexcept { return ec == std::errc{}; } }; }} // Namespaces diff --git a/include/boost/charconv/to_chars.hpp b/include/boost/charconv/to_chars.hpp index 7192fda5..35726262 100644 --- a/include/boost/charconv/to_chars.hpp +++ b/include/boost/charconv/to_chars.hpp @@ -17,47 +17,47 @@ namespace charconv { // integer overloads BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, bool value, int base) noexcept = delete; -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, char value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, char value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, signed char value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, signed char value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned char value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned char value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, short value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, short value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned short value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned short value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, int value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, int value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned int value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned int value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, long value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, long value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned long value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned long value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, long long value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, long long value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } -BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned long long value, int base = 10) noexcept +BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned long long value, int base = 10) noexcept { return detail::to_chars_int(first, last, value, base); } diff --git a/test/cuda_jamfile b/test/cuda_jamfile index be30752e..3ea223f1 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -32,3 +32,27 @@ run test_from_chars_bases_long.cu ; run test_from_chars_bases_unsigned_long.cu ; run test_from_chars_bases_long_long.cu ; run test_from_chars_bases_unsigned_long_long.cu ; + +run test_to_chars_char.cu ; +run test_to_chars_signed_char.cu ; +run test_to_chars_unsigned_char.cu ; +run test_to_chars_short.cu ; +run test_to_chars_unsigned_short.cu ; +run test_to_chars_int.cu ; +run test_to_chars_unsigned_int.cu ; +run test_to_chars_long.cu ; +run test_to_chars_unsigned_long.cu ; +run test_to_chars_long_long.cu ; +run test_to_chars_unsigned_long_long.cu ; + +run test_to_chars_bases_char.cu ; +run test_to_chars_bases_signed_char.cu ; +run test_to_chars_bases_unsigned_char.cu ; +run test_to_chars_bases_short.cu ; +run test_to_chars_bases_unsigned_short.cu ; +run test_to_chars_bases_int.cu ; +run test_to_chars_bases_unsigned_int.cu ; +run test_to_chars_bases_long.cu ; +run test_to_chars_bases_unsigned_long.cu ; +run test_to_chars_bases_long_long.cu ; +run test_to_chars_bases_unsigned_long_long.cu ; diff --git a/test/test_to_chars_bases_char.cu b/test/test_to_chars_bases_char.cu new file mode 100644 index 00000000..73a3fcb2 --- /dev/null +++ b/test/test_to_chars_bases_char.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = char; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = static_cast(dist(rng)); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_bases_int.cu b/test/test_to_chars_bases_int.cu new file mode 100644 index 00000000..914b654d --- /dev/null +++ b/test/test_to_chars_bases_int.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = int; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_bases_long.cu b/test/test_to_chars_bases_long.cu new file mode 100644 index 00000000..635acaf9 --- /dev/null +++ b/test/test_to_chars_bases_long.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = long; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_bases_long_long.cu b/test/test_to_chars_bases_long_long.cu new file mode 100644 index 00000000..8c6fea3d --- /dev/null +++ b/test/test_to_chars_bases_long_long.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = long long; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_bases_short.cu b/test/test_to_chars_bases_short.cu new file mode 100644 index 00000000..464e36f0 --- /dev/null +++ b/test/test_to_chars_bases_short.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = short; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_bases_signed_char.cu b/test/test_to_chars_bases_signed_char.cu new file mode 100644 index 00000000..e8b33f2e --- /dev/null +++ b/test/test_to_chars_bases_signed_char.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = signed char; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = static_cast(dist(rng)); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_bases_unsigned_char.cu b/test/test_to_chars_bases_unsigned_char.cu new file mode 100644 index 00000000..3c146dec --- /dev/null +++ b/test/test_to_chars_bases_unsigned_char.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned char; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = static_cast(dist(rng)); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_bases_unsigned_int.cu b/test/test_to_chars_bases_unsigned_int.cu new file mode 100644 index 00000000..a54b9d79 --- /dev/null +++ b/test/test_to_chars_bases_unsigned_int.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned int; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_bases_unsigned_long.cu b/test/test_to_chars_bases_unsigned_long.cu new file mode 100644 index 00000000..53f0fd28 --- /dev/null +++ b/test/test_to_chars_bases_unsigned_long.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned long; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_bases_unsigned_long_long.cu b/test/test_to_chars_bases_unsigned_long_long.cu new file mode 100644 index 00000000..f1162313 --- /dev/null +++ b/test/test_to_chars_bases_unsigned_long_long.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned long long; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_bases_unsigned_short.cu b/test/test_to_chars_bases_unsigned_short.cu new file mode 100644 index 00000000..6f710daf --- /dev/null +++ b/test/test_to_chars_bases_unsigned_short.cu @@ -0,0 +1,112 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned short; + +constexpr int BUF_SIZE = 128; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + int threadsPerBlock = 256; + int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock; + + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + + for (int base = 2; base <= 36; ++base) + { + // Initialize the input vectors + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl; + } + + std::cout << "All bases PASSED" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_char.cu b/test/test_to_chars_char.cu new file mode 100644 index 00000000..2a23108d --- /dev/null +++ b/test/test_to_chars_char.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = char; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = static_cast(dist(rng)); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_int.cu b/test/test_to_chars_int.cu new file mode 100644 index 00000000..5028b20b --- /dev/null +++ b/test/test_to_chars_int.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = int; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_long.cu b/test/test_to_chars_long.cu new file mode 100644 index 00000000..767426ad --- /dev/null +++ b/test/test_to_chars_long.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = long; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_long_long.cu b/test/test_to_chars_long_long.cu new file mode 100644 index 00000000..14c91426 --- /dev/null +++ b/test/test_to_chars_long_long.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = long long; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_short.cu b/test/test_to_chars_short.cu new file mode 100644 index 00000000..1c07d81a --- /dev/null +++ b/test/test_to_chars_short.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = short; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_signed_char.cu b/test/test_to_chars_signed_char.cu new file mode 100644 index 00000000..2f747beb --- /dev/null +++ b/test/test_to_chars_signed_char.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = signed char; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = static_cast(dist(rng)); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_unsigned_char.cu b/test/test_to_chars_unsigned_char.cu new file mode 100644 index 00000000..6affa2f9 --- /dev/null +++ b/test/test_to_chars_unsigned_char.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned char; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = static_cast(dist(rng)); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_unsigned_int.cu b/test/test_to_chars_unsigned_int.cu new file mode 100644 index 00000000..07427fc1 --- /dev/null +++ b/test/test_to_chars_unsigned_int.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned int; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_unsigned_long.cu b/test/test_to_chars_unsigned_long.cu new file mode 100644 index 00000000..d72c96a4 --- /dev/null +++ b/test/test_to_chars_unsigned_long.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned long; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_unsigned_long_long.cu b/test/test_to_chars_unsigned_long_long.cu new file mode 100644 index 00000000..c9ddd30c --- /dev/null +++ b/test/test_to_chars_unsigned_long_long.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned long long; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +} diff --git a/test/test_to_chars_unsigned_short.cu b/test/test_to_chars_unsigned_short.cu new file mode 100644 index 00000000..d4f001d9 --- /dev/null +++ b/test/test_to_chars_unsigned_short.cu @@ -0,0 +1,105 @@ +// Copyright Matt Borland 2024 - 2026. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include "cuda_managed_ptr.hpp" +#include "stopwatch.hpp" + +// For the CUDA runtime routines (prefixed with "cuda_") +#include + +using test_type = unsigned short; + +constexpr int BUF_SIZE = 32; + +__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < numElements) + { + char* buf = out_strings + i * BUF_SIZE; + auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]); + out_lengths[i] = static_cast(res.ptr - buf); + } +} + +/** + * Host main routine + */ +int main(void) +{ + std::mt19937_64 rng {42}; + + // Error code to check return values for CUDA calls + cudaError_t err = cudaSuccess; + + // Print the vector length to be used, and compute its size + int numElements = 50000; + std::cout << "[Vector operation on " << numElements << " elements]" << std::endl; + + // Allocate the managed input vector + cuda_managed_ptr input_vector(numElements); + + // Allocate the managed output vectors + cuda_managed_ptr output_strings(numElements * BUF_SIZE); + cuda_managed_ptr output_lengths(numElements); + + // Initialize the input vectors + std::uniform_int_distribution dist {(std::numeric_limits::min)(), (std::numeric_limits::max)()}; + for (std::size_t i = 0; i < numElements; ++i) + { + input_vector[i] = dist(rng); + } + + // Launch the CUDA Kernel + int threadsPerBlock = 256; + int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock; + std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl; + + watch w; + + cuda_test<<>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements); + cudaDeviceSynchronize(); + + std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl; + + err = cudaGetLastError(); + + if (err != cudaSuccess) + { + std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl; + return EXIT_FAILURE; + } + + // Verify that the result vector is correct + w.reset(); + for(int i = 0; i < numElements; ++i) + { + char cpu_buf[BUF_SIZE]; + auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]); + int cpu_len = static_cast(cpu_res.ptr - cpu_buf); + int gpu_len = output_lengths[i]; + const char* gpu_buf = &output_strings[i * BUF_SIZE]; + + if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast(cpu_len)) != 0) + { + std::cerr << "Result verification failed at element " << i << "!" << std::endl; + return EXIT_FAILURE; + } + } + double t = w.elapsed(); + + std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl; + std::cout << "Done\n"; + + return 0; +}