diff --git a/include/boost/charconv/detail/apply_sign.hpp b/include/boost/charconv/detail/apply_sign.hpp
index ef6db714..5e5c79e6 100644
--- a/include/boost/charconv/detail/apply_sign.hpp
+++ b/include/boost/charconv/detail/apply_sign.hpp
@@ -26,13 +26,13 @@ namespace boost { namespace charconv { namespace detail {
 
 template <typename Integer, typename Unsigned_Integer = detail::make_unsigned_t<Integer>,
           typename std::enable_if<detail::is_signed<Integer>::value, bool>::type = true>
-constexpr Unsigned_Integer apply_sign(Integer val) noexcept
+BOOST_CHARCONV_HOST_DEVICE constexpr Unsigned_Integer apply_sign(Integer val) noexcept
 {
     return -(static_cast<Unsigned_Integer>(val));
 }
 
 template <typename Unsigned_Integer, typename std::enable_if<!detail::is_signed<Unsigned_Integer>::value, bool>::type = true>
-constexpr Unsigned_Integer apply_sign(Unsigned_Integer val) noexcept
+BOOST_CHARCONV_HOST_DEVICE constexpr Unsigned_Integer apply_sign(Unsigned_Integer val) noexcept
 {
     return val;
 }
diff --git a/include/boost/charconv/detail/memcpy.hpp b/include/boost/charconv/detail/memcpy.hpp
index 1e68315f..4f30dccd 100644
--- a/include/boost/charconv/detail/memcpy.hpp
+++ b/include/boost/charconv/detail/memcpy.hpp
@@ -25,6 +25,22 @@
 
 namespace boost { namespace charconv { namespace detail {
 
+#ifdef __NVCC__
+
+__host__ __device__ constexpr char* memcpy(char* dest, const char* src, std::size_t count)
+{
+    for (std::size_t i = 0; i < count; ++i)
+    {
+        *(dest + i) = *(src + i);
+    }
+
+    return dest;
+}
+
+#define BOOST_CHARCONV_CONSTEXPR constexpr
+
+#else
+
 #if !defined(BOOST_CHARCONV_NO_CONSTEXPR_DETECTION) && defined(BOOST_CXX14_CONSTEXPR)
 
 #define BOOST_CHARCONV_CONSTEXPR constexpr
@@ -69,6 +85,8 @@ inline void* memcpy(void* dest, const void* src, std::size_t count)
 
 #endif
 
+#endif // NVCC
+
 }}} // Namespace boost::charconv::detail
 
 #ifdef BOOST_CHARCONV_STRINGOP_OVERFLOW_DISABLED
diff --git a/include/boost/charconv/detail/to_chars_integer_impl.hpp b/include/boost/charconv/detail/to_chars_integer_impl.hpp
index 5b256c26..e6e7b3ca 100644
--- a/include/boost/charconv/detail/to_chars_integer_impl.hpp
+++ b/include/boost/charconv/detail/to_chars_integer_impl.hpp
@@ -52,6 +52,8 @@ static constexpr char radix_table[] = {
         '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
 };
 
+#ifndef __NVCC__
+
 static constexpr char digit_table[] = {
         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
         'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
@@ -301,6 +303,8 @@ BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_128integer_impl(char* first, c
     return {first + converted_value_digits, std::errc()};
 }
 
+#endif // __NVCC__
+
 // Conversion warning from shift operators with unsigned char
 #if defined(__GNUC__) && __GNUC__ >= 5
 # pragma GCC diagnostic push
@@ -313,8 +317,19 @@ BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_128integer_impl(char* first, c
 // All other bases
 // Use a simple lookup table to put together the Integer in character form
 template <typename Integer, typename Unsigned_Integer>
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_integer_impl(char* first, char* last, Integer value, int base) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_integer_impl(char* first, char* last, Integer value, int base) noexcept
 {
+    #ifdef __NVCC__
+
+    constexpr char digit_table[] = {
+        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
+        'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
+        'u', 'v', 'w', 'x', 'y', 'z'
+    };
+
+    #endif
+
     if (!((first <= last) && (base >= 2 && base <= 36)))
     {
         return {last, std::errc::invalid_argument};
@@ -381,6 +396,18 @@ BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_integer_impl(char* first, char
             }
             break;
 
+        #ifdef __NVCC__
+
+        case 10:
+            while (unsigned_value != static_cast<Unsigned_Integer>(0))
+            {
+                *end-- = static_cast<char>(zero + (unsigned_value % 10U));
+                unsigned_value /= 10U;
+            }
+            break;
+
+        #endif
+
         case 16:
             while (unsigned_value != static_cast<Unsigned_Integer>(0))
             {
@@ -430,13 +457,18 @@ BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_integer_impl(char* first, char
 #endif
 
 template <typename Integer>
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_int(char* first, char* last, Integer value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars_int(char* first, char* last, Integer value, int base = 10) noexcept
 {
     using Unsigned_Integer = typename std::make_unsigned<Integer>::type;
+
+    // The specialized base 10 path requires lookup tables and memcpy
+    // On device, we instead use the trivial divide and mod to avoid these
+    #ifndef __NVCC__
     if (base == 10)
     {
         return to_chars_integer_impl(first, last, value);
     }
+    #endif
 
     return to_chars_integer_impl<Integer, Unsigned_Integer>(first, last, value, base);
 }
diff --git a/include/boost/charconv/detail/to_chars_result.hpp b/include/boost/charconv/detail/to_chars_result.hpp
index e564fe6c..a0fedbbc 100644
--- a/include/boost/charconv/detail/to_chars_result.hpp
+++ b/include/boost/charconv/detail/to_chars_result.hpp
@@ -5,6 +5,7 @@
 #ifndef BOOST_CHARCONV_DETAIL_TO_CHARS_RESULT_HPP
 #define BOOST_CHARCONV_DETAIL_TO_CHARS_RESULT_HPP
 
+#include <boost/charconv/detail/config.hpp>
 #include <system_error>
 
 // 22.13.2, Primitive numerical output conversion
@@ -16,17 +17,17 @@ struct to_chars_result
     char *ptr;
     std::errc ec;
 
-    constexpr friend bool operator==(const to_chars_result &lhs, const to_chars_result &rhs) noexcept
+    BOOST_CHARCONV_HOST_DEVICE constexpr friend bool operator==(const to_chars_result &lhs, const to_chars_result &rhs) noexcept
     {
         return lhs.ptr == rhs.ptr && lhs.ec == rhs.ec;
     }
 
-    constexpr friend bool operator!=(const to_chars_result &lhs, const to_chars_result &rhs) noexcept
+    BOOST_CHARCONV_HOST_DEVICE constexpr friend bool operator!=(const to_chars_result &lhs, const to_chars_result &rhs) noexcept
     {
         return !(lhs == rhs);
     }
 
-    constexpr explicit operator bool() const noexcept { return ec == std::errc{}; }
+    BOOST_CHARCONV_HOST_DEVICE constexpr explicit operator bool() const noexcept { return ec == std::errc{}; }
 };
 
 }} // Namespaces
diff --git a/include/boost/charconv/to_chars.hpp b/include/boost/charconv/to_chars.hpp
index 7192fda5..35726262 100644
--- a/include/boost/charconv/to_chars.hpp
+++ b/include/boost/charconv/to_chars.hpp
@@ -17,47 +17,47 @@ namespace charconv {
 
 // integer overloads
 BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, bool value, int base) noexcept = delete;
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, char value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, char value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, signed char value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, signed char value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned char value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned char value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, short value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, short value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned short value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned short value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, int value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, int value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned int value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned int value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, long value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, long value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned long value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned long value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, long long value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, long long value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
-BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned long long value, int base = 10) noexcept
+BOOST_CHARCONV_HOST_DEVICE BOOST_CHARCONV_CONSTEXPR to_chars_result to_chars(char* first, char* last, unsigned long long value, int base = 10) noexcept
 {
     return detail::to_chars_int(first, last, value, base);
 }
diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index be30752e..3ea223f1 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -32,3 +32,27 @@ run test_from_chars_bases_long.cu ;
 run test_from_chars_bases_unsigned_long.cu ;
 run test_from_chars_bases_long_long.cu ;
 run test_from_chars_bases_unsigned_long_long.cu ;
+
+run test_to_chars_char.cu ;
+run test_to_chars_signed_char.cu ;
+run test_to_chars_unsigned_char.cu ;
+run test_to_chars_short.cu ;
+run test_to_chars_unsigned_short.cu ;
+run test_to_chars_int.cu ;
+run test_to_chars_unsigned_int.cu ;
+run test_to_chars_long.cu ;
+run test_to_chars_unsigned_long.cu ;
+run test_to_chars_long_long.cu ;
+run test_to_chars_unsigned_long_long.cu ;
+
+run test_to_chars_bases_char.cu ;
+run test_to_chars_bases_signed_char.cu ;
+run test_to_chars_bases_unsigned_char.cu ;
+run test_to_chars_bases_short.cu ;
+run test_to_chars_bases_unsigned_short.cu ;
+run test_to_chars_bases_int.cu ;
+run test_to_chars_bases_unsigned_int.cu ;
+run test_to_chars_bases_long.cu ;
+run test_to_chars_bases_unsigned_long.cu ;
+run test_to_chars_bases_long_long.cu ;
+run test_to_chars_bases_unsigned_long_long.cu ;
diff --git a/test/test_to_chars_bases_char.cu b/test/test_to_chars_bases_char.cu
new file mode 100644
index 00000000..73a3fcb2
--- /dev/null
+++ b/test/test_to_chars_bases_char.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = char;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = static_cast<test_type>(dist(rng));
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_bases_int.cu b/test/test_to_chars_bases_int.cu
new file mode 100644
index 00000000..914b654d
--- /dev/null
+++ b/test/test_to_chars_bases_int.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = int;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<int> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = dist(rng);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_bases_long.cu b/test/test_to_chars_bases_long.cu
new file mode 100644
index 00000000..635acaf9
--- /dev/null
+++ b/test/test_to_chars_bases_long.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = long;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = dist(rng);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_bases_long_long.cu b/test/test_to_chars_bases_long_long.cu
new file mode 100644
index 00000000..8c6fea3d
--- /dev/null
+++ b/test/test_to_chars_bases_long_long.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = long long;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<long long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = dist(rng);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_bases_short.cu b/test/test_to_chars_bases_short.cu
new file mode 100644
index 00000000..464e36f0
--- /dev/null
+++ b/test/test_to_chars_bases_short.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = short;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = dist(rng);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_bases_signed_char.cu b/test/test_to_chars_bases_signed_char.cu
new file mode 100644
index 00000000..e8b33f2e
--- /dev/null
+++ b/test/test_to_chars_bases_signed_char.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = signed char;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = static_cast<test_type>(dist(rng));
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_bases_unsigned_char.cu b/test/test_to_chars_bases_unsigned_char.cu
new file mode 100644
index 00000000..3c146dec
--- /dev/null
+++ b/test/test_to_chars_bases_unsigned_char.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned char;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<unsigned short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = static_cast<test_type>(dist(rng));
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_bases_unsigned_int.cu b/test/test_to_chars_bases_unsigned_int.cu
new file mode 100644
index 00000000..a54b9d79
--- /dev/null
+++ b/test/test_to_chars_bases_unsigned_int.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned int;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<unsigned int> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = dist(rng);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_bases_unsigned_long.cu b/test/test_to_chars_bases_unsigned_long.cu
new file mode 100644
index 00000000..53f0fd28
--- /dev/null
+++ b/test/test_to_chars_bases_unsigned_long.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned long;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<unsigned long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = dist(rng);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_bases_unsigned_long_long.cu b/test/test_to_chars_bases_unsigned_long_long.cu
new file mode 100644
index 00000000..f1162313
--- /dev/null
+++ b/test/test_to_chars_bases_unsigned_long_long.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned long long;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<unsigned long long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = dist(rng);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_bases_unsigned_short.cu b/test/test_to_chars_bases_unsigned_short.cu
new file mode 100644
index 00000000..6f710daf
--- /dev/null
+++ b/test/test_to_chars_bases_unsigned_short.cu
@@ -0,0 +1,112 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned short;
+
+constexpr int BUF_SIZE = 128;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements, int base)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i], base);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+
+    std::uniform_int_distribution<unsigned short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+
+    for (int base = 2; base <= 36; ++base)
+    {
+        // Initialize the input vectors
+        for (std::size_t i = 0; i < numElements; ++i)
+        {
+            input_vector[i] = dist(rng);
+        }
+
+        // Launch the CUDA Kernel
+        std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads, base " << base << std::endl;
+
+        watch w;
+
+        cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements, base);
+        cudaDeviceSynchronize();
+
+        std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+        err = cudaGetLastError();
+
+        if (err != cudaSuccess)
+        {
+            std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+            return EXIT_FAILURE;
+        }
+
+        // Verify that the result vector is correct
+        w.reset();
+        for(int i = 0; i < numElements; ++i)
+        {
+            char cpu_buf[BUF_SIZE];
+            auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i], base);
+            int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+            int gpu_len = output_lengths[i];
+            const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+            if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+            {
+                std::cerr << "Result verification failed at element " << i << " base " << base << "!" << std::endl;
+                return EXIT_FAILURE;
+            }
+        }
+        double t = w.elapsed();
+
+        std::cout << "Test base " << base << " PASSED, normal calculation time: " << t << "s" << std::endl;
+    }
+
+    std::cout << "All bases PASSED" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_char.cu b/test/test_to_chars_char.cu
new file mode 100644
index 00000000..2a23108d
--- /dev/null
+++ b/test/test_to_chars_char.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = char;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = static_cast<test_type>(dist(rng));
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_int.cu b/test/test_to_chars_int.cu
new file mode 100644
index 00000000..5028b20b
--- /dev/null
+++ b/test/test_to_chars_int.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = int;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<int> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_long.cu b/test/test_to_chars_long.cu
new file mode 100644
index 00000000..767426ad
--- /dev/null
+++ b/test/test_to_chars_long.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = long;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_long_long.cu b/test/test_to_chars_long_long.cu
new file mode 100644
index 00000000..14c91426
--- /dev/null
+++ b/test/test_to_chars_long_long.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = long long;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<long long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_short.cu b/test/test_to_chars_short.cu
new file mode 100644
index 00000000..1c07d81a
--- /dev/null
+++ b/test/test_to_chars_short.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = short;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_signed_char.cu b/test/test_to_chars_signed_char.cu
new file mode 100644
index 00000000..2f747beb
--- /dev/null
+++ b/test/test_to_chars_signed_char.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = signed char;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = static_cast<test_type>(dist(rng));
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_unsigned_char.cu b/test/test_to_chars_unsigned_char.cu
new file mode 100644
index 00000000..6affa2f9
--- /dev/null
+++ b/test/test_to_chars_unsigned_char.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned char;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<unsigned short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = static_cast<test_type>(dist(rng));
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_unsigned_int.cu b/test/test_to_chars_unsigned_int.cu
new file mode 100644
index 00000000..07427fc1
--- /dev/null
+++ b/test/test_to_chars_unsigned_int.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned int;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<unsigned int> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_unsigned_long.cu b/test/test_to_chars_unsigned_long.cu
new file mode 100644
index 00000000..d72c96a4
--- /dev/null
+++ b/test/test_to_chars_unsigned_long.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned long;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<unsigned long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_unsigned_long_long.cu b/test/test_to_chars_unsigned_long_long.cu
new file mode 100644
index 00000000..c9ddd30c
--- /dev/null
+++ b/test/test_to_chars_unsigned_long_long.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned long long;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<unsigned long long> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}
diff --git a/test/test_to_chars_unsigned_short.cu b/test/test_to_chars_unsigned_short.cu
new file mode 100644
index 00000000..d4f001d9
--- /dev/null
+++ b/test/test_to_chars_unsigned_short.cu
@@ -0,0 +1,105 @@
+//  Copyright Matt Borland 2024 - 2026.
+//  Use, modification and distribution are subject to the
+//  Boost Software License, Version 1.0. (See accompanying file
+//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <random>
+#include <limits>
+#include <cstring>
+#include <boost/charconv/to_chars.hpp>
+#include "cuda_managed_ptr.hpp"
+#include "stopwatch.hpp"
+
+// For the CUDA runtime routines (prefixed with "cuda_")
+#include <cuda_runtime.h>
+
+using test_type = unsigned short;
+
+constexpr int BUF_SIZE = 32;
+
+__global__ void cuda_test(const test_type *in, char *out_strings, int *out_lengths, int numElements)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < numElements)
+    {
+        char* buf = out_strings + i * BUF_SIZE;
+        auto res = boost::charconv::to_chars(buf, buf + BUF_SIZE, in[i]);
+        out_lengths[i] = static_cast<int>(res.ptr - buf);
+    }
+}
+
+/**
+ * Host main routine
+ */
+int main(void)
+{
+    std::mt19937_64 rng {42};
+
+    // Error code to check return values for CUDA calls
+    cudaError_t err = cudaSuccess;
+
+    // Print the vector length to be used, and compute its size
+    int numElements = 50000;
+    std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
+
+    // Allocate the managed input vector
+    cuda_managed_ptr<test_type> input_vector(numElements);
+
+    // Allocate the managed output vectors
+    cuda_managed_ptr<char> output_strings(numElements * BUF_SIZE);
+    cuda_managed_ptr<int> output_lengths(numElements);
+
+    // Initialize the input vectors
+    std::uniform_int_distribution<unsigned short> dist {(std::numeric_limits<test_type>::min)(), (std::numeric_limits<test_type>::max)()};
+    for (std::size_t i = 0; i < numElements; ++i)
+    {
+        input_vector[i] = dist(rng);
+    }
+
+    // Launch the CUDA Kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid =(numElements + threadsPerBlock - 1) / threadsPerBlock;
+    std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
+
+    watch w;
+
+    cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_strings.get(), output_lengths.get(), numElements);
+    cudaDeviceSynchronize();
+
+    std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;
+
+    err = cudaGetLastError();
+
+    if (err != cudaSuccess)
+    {
+        std::cerr << "Failed to launch kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    // Verify that the result vector is correct
+    w.reset();
+    for(int i = 0; i < numElements; ++i)
+    {
+        char cpu_buf[BUF_SIZE];
+        auto cpu_res = boost::charconv::to_chars(cpu_buf, cpu_buf + BUF_SIZE, input_vector[i]);
+        int cpu_len = static_cast<int>(cpu_res.ptr - cpu_buf);
+        int gpu_len = output_lengths[i];
+        const char* gpu_buf = &output_strings[i * BUF_SIZE];
+
+        if (cpu_len != gpu_len || std::memcmp(cpu_buf, gpu_buf, static_cast<std::size_t>(cpu_len)) != 0)
+        {
+            std::cerr << "Result verification failed at element " << i << "!" << std::endl;
+            return EXIT_FAILURE;
+        }
+    }
+    double t = w.elapsed();
+
+    std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
+    std::cout << "Done\n";
+
+    return 0;
+}