From 7998366933d786f81659c3652706378e029f87ec Mon Sep 17 00:00:00 2001 From: Dirk Eddelbuettel Date: Sat, 13 Sep 2025 06:45:06 -0500 Subject: [PATCH] simdjson 4.0.0 --- inst/include/simdjson.cpp | 1435 ++- inst/include/simdjson.h | 19706 +++++++++++++++++++++++++++++++++--- 2 files changed, 19882 insertions(+), 1259 deletions(-) diff --git a/inst/include/simdjson.cpp b/inst/include/simdjson.cpp index aaeca3f..a783697 100644 --- a/inst/include/simdjson.cpp +++ b/inst/include/simdjson.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2025-02-14 16:11:36 -0500. Do not edit! */ +/* auto-generated on 2025-09-09 18:44:21 -0400. version 4.0.0 Do not edit! */ /* including simdjson.cpp: */ /* begin file simdjson.cpp */ #define SIMDJSON_SRC_SIMDJSON_CPP @@ -77,30 +77,71 @@ #endif #endif +#ifndef SIMDJSON_CONSTEXPR_LAMBDA +#if SIMDJSON_CPLUSPLUS17 +#define SIMDJSON_CONSTEXPR_LAMBDA constexpr +#else +#define SIMDJSON_CONSTEXPR_LAMBDA +#endif +#endif + + + #ifdef __has_include #if __has_include() #include #endif #endif +// The current specification is unclear on how we detect +// static reflection, both __cpp_lib_reflection and +// __cpp_impl_reflection are proposed in the draft specification. +// For now, we disable static reflect by default. It must be +// specified at compiler time. +#ifndef SIMDJSON_STATIC_REFLECTION +#define SIMDJSON_STATIC_REFLECTION 0 // disabled by default. +#endif + #if defined(__apple_build_version__) #if __apple_build_version__ < 14000000 #define SIMDJSON_CONCEPT_DISABLED 1 // apple-clang/13 doesn't support std::convertible_to #endif #endif +#if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 201911L +#include +#define SIMDJSON_SUPPORTS_RANGES 1 +#else +#define SIMDJSON_SUPPORTS_RANGES 0 +#endif #if defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) #if __cpp_concepts >= 201907L #include -#define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#define SIMDJSON_SUPPORTS_CONCEPTS 1 #else -#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#define SIMDJSON_SUPPORTS_CONCEPTS 0 #endif #else // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) -#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#define SIMDJSON_SUPPORTS_CONCEPTS 0 #endif // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) +// copy SIMDJSON_SUPPORTS_CONCEPTS to SIMDJSON_SUPPORTS_DESERIALIZATION. +#if SIMDJSON_SUPPORTS_CONCEPTS +#define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#else +#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#endif + + +#if !defined(SIMDJSON_CONSTEVAL) +#if defined(__cpp_consteval) && __cpp_consteval >= 201811L && defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 201907L +#define SIMDJSON_CONSTEVAL 1 +#else +#define SIMDJSON_CONSTEVAL 0 +#endif // defined(__cpp_consteval) && __cpp_consteval >= 201811L && defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 201907L +#endif // !defined(SIMDJSON_CONSTEVAL) + #endif // SIMDJSON_COMPILER_CHECK_H /* end file simdjson/compiler_check.h */ /* including simdjson/portability.h: #include "simdjson/portability.h" */ @@ -152,6 +193,22 @@ using std::size_t; #define SIMDJSON_IS_ARM64 1 #elif defined(__riscv) && __riscv_xlen == 64 #define SIMDJSON_IS_RISCV64 1 + #if __riscv_v_intrinsic >= 11000 + #define SIMDJSON_HAS_RVV_INTRINSICS 1 + #endif + + #define SIMDJSON_HAS_ZVBB_INTRINSICS \ + 0 // there is currently no way to detect this + + #if SIMDJSON_HAS_RVV_INTRINSICS && __riscv_vector && \ + __riscv_v_min_vlen >= 128 && __riscv_v_elen >= 64 + // RISC-V V extension + #define SIMDJSON_IS_RVV 1 + #if SIMDJSON_HAS_ZVBB_INTRINSICS && __riscv_zvbb >= 1000000 + // RISC-V Vector Basic Bit-manipulation + #define SIMDJSON_IS_ZVBB 1 + #endif + #endif #elif defined(__loongarch_lp64) #define SIMDJSON_IS_LOONGARCH64 1 #elif defined(__PPC64__) || defined(_M_PPC64) @@ -298,6 +355,7 @@ using std::size_t; #if defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) // If NDEBUG is set, or __OPTIMIZE__ is set, or we are under MSVC in release mode, // then do away with asserts and use __assume. +// We still recommend that our users set NDEBUG in release mode. #if SIMDJSON_VISUAL_STUDIO #define SIMDJSON_UNREACHABLE() __assume(0) #define SIMDJSON_ASSUME(COND) __assume(COND) @@ -373,7 +431,7 @@ double from_chars(const char *first, const char* end) noexcept; } #ifndef SIMDJSON_EXCEPTIONS -#if __cpp_exceptions +#if defined(__cpp_exceptions) || defined(_CPPUNWIND) #define SIMDJSON_EXCEPTIONS 1 #else #define SIMDJSON_EXCEPTIONS 0 @@ -549,17 +607,6 @@ double from_chars(const char *first, const char* end) noexcept; // We assume by default static linkage #define SIMDJSON_DLLIMPORTEXPORT #endif - -/** - * Workaround for the vcpkg package manager. Only vcpkg should - * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. - */ -#if SIMDJSON_USING_LIBRARY -#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) -#endif -/** - * End of workaround for the vcpkg package manager. - */ #else #define SIMDJSON_DLLIMPORTEXPORT #endif @@ -576,12 +623,14 @@ double from_chars(const char *first, const char* end) noexcept; // even if we do not have C++17 support. #ifdef __cpp_lib_string_view #define SIMDJSON_HAS_STRING_VIEW +#include #endif // Some systems have string_view even if we do not have C++17 support, // and even if __cpp_lib_string_view is undefined, it is the case // with Apple clang version 11. // We must handle it. *This is important.* +#ifndef _MSC_VER #ifndef SIMDJSON_HAS_STRING_VIEW #if defined __has_include // do not combine the next #if with the previous one (unsafe) @@ -597,6 +646,7 @@ double from_chars(const char *first, const char* end) noexcept; #endif // __has_include () #endif // defined __has_include #endif // def SIMDJSON_HAS_STRING_VIEW +#endif // def _MSC_VER // end of complicated but important routine to try to detect string_view. // @@ -776,22 +826,22 @@ inline namespace literals { inline namespace string_view_literals { -constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1) { return std::string_view{ str, len }; } -constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2) { return std::u16string_view{ str, len }; } -constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3) { return std::u32string_view{ str, len }; } -constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4) { return std::wstring_view{ str, len }; } @@ -1019,9 +1069,9 @@ using std::operator<<; #endif #if nssv_HAVE_NODISCARD -# define nssv_nodiscard [[nodiscard]] +# define nssv_nodiscard simdjson_warn_unused #else -# define nssv_nodiscard /*[[nodiscard]]*/ +# define nssv_nodiscard /*simdjson_warn_unused*/ #endif // Additional includes: @@ -2122,22 +2172,22 @@ nssv_inline_ns namespace string_view_literals { #if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS -nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2146,22 +2196,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, #if nssv_CONFIG_USR_SV_OPERATOR -nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2339,16 +2389,25 @@ namespace std { // It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer // sets _DEBUG in a release build under Visual Studio, or if some compiler fails to // set the __OPTIMIZE__ macro). +// We make it so that if NDEBUG is defined, then SIMDJSON_DEVELOPMENT_CHECKS +// is not defined, irrespective of the compiler. +// We recommend that users set NDEBUG in release builds, so that +// SIMDJSON_DEVELOPMENT_CHECKS is not defined in release builds by default, +// irrespective of the compiler. #ifndef SIMDJSON_DEVELOPMENT_CHECKS #ifdef _MSC_VER // Visual Studio seems to set _DEBUG for debug builds. -#ifdef _DEBUG +// We set SIMDJSON_DEVELOPMENT_CHECKS to 1 if _DEBUG is defined +// and NDEBUG is not defined. +#if defined(_DEBUG) && !defined(NDEBUG) #define SIMDJSON_DEVELOPMENT_CHECKS 1 #endif // _DEBUG #else // _MSC_VER // All other compilers appear to set __OPTIMIZE__ to a positive integer // when the compiler is optimizing. -#ifndef __OPTIMIZE__ +// We only set SIMDJSON_DEVELOPMENT_CHECKS if both __OPTIMIZE__ +// and NDEBUG are not defined. +#if !defined(__OPTIMIZE__) && !defined(NDEBUG) #define SIMDJSON_DEVELOPMENT_CHECKS 1 #endif // __OPTIMIZE__ #endif // _MSC_VER @@ -2404,6 +2463,18 @@ namespace std { #define SIMDJSON_AVX512_ALLOWED 1 #endif + +#ifndef __has_cpp_attribute +#define simdjson_lifetime_bound +#elif __has_cpp_attribute(msvc::lifetimebound) +#define simdjson_lifetime_bound [[msvc::lifetimebound]] +#elif __has_cpp_attribute(clang::lifetimebound) +#define simdjson_lifetime_bound [[clang::lifetimebound]] +#elif __has_cpp_attribute(lifetimebound) +#define simdjson_lifetime_bound [[lifetimebound]] +#else +#define simdjson_lifetime_bound +#endif #endif // SIMDJSON_COMMON_DEFS_H /* end file simdjson/common_defs.h */ /* skipped duplicate #include "simdjson/compiler_check.h" */ @@ -2431,7 +2502,7 @@ enum error_code { SUCCESS = 0, ///< No error CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong, this is a generic error + TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error. DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' @@ -2456,13 +2527,22 @@ enum error_code { PARSER_IN_USE, ///< parser is already in use. OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1) INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. - INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input - NUM_ERROR_CODES + OUT_OF_CAPACITY, ///< The capacity was exceeded, we cannot allocate enough memory. + NUM_ERROR_CODES ///< Placeholder for end of error code list. }; +/** + * Some errors are fatal and invalidate the document. This function returns true if the + * error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT. + * Once a fatal error is encountered, the on-demand document is no longer valid and + * processing should stop. + */ + inline bool is_fatal(error_code error) noexcept; + /** * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code @@ -2510,6 +2590,10 @@ namespace internal { /** * The result of a simdjson operation that could fail. * + * IMPORTANT: For the ondemand API, we use implementation_simdjson_result_base as a base class + * to avoid some compilation issue. Thus, if you modify this class, please ensure that the ondemand + * implementation_simdjson_result_base is also modified. + * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for @@ -2570,8 +2654,27 @@ struct simdjson_result_base : protected std::pair { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Dereference operator to access the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + /** * Get the result value. * @@ -2599,17 +2702,48 @@ struct simdjson_result_base : protected std::pair { * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); + #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. + * We discourage the use of value_unsafe(). + * + * The recommended pattern is: + * + * T value; // where T is the type + * auto error = result.get(value); + * if (error) { + * // handle error + * } + * + * Or you may call 'value()' which will raise an exception + * in case of error: + * + * T value = result.value(); */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. + * We discourage the use of value_unsafe(). + * + * The recommended pattern is: + * + * T value; // where T is the type + * auto error = result.get(value); + * if (error) { + * // handle error, return, exit, abort + * } else { + * // use value here. + * } + * + * Or you may call 'value()' which will raise an exception + * in case of error: + * + * T value = result.value(); */ simdjson_inline T&& value_unsafe() && noexcept; @@ -2624,6 +2758,7 @@ struct simdjson_result_base : protected std::pair { */ template struct simdjson_result : public internal::simdjson_result_base { + /** * @private Create a new empty result with error = UNINITIALIZED. */ @@ -2656,13 +2791,32 @@ struct simdjson_result : public internal::simdjson_result_base { */ simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; + /** + * Copy the value to a provided std::string, only enabled for std::string_view. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + template + simdjson_warn_unused simdjson_inline error_code get(std::string &value) && noexcept { + static_assert(std::is_same::value, "SFINAE"); + std::string_view v; + error_code error = std::forward>(*this).get(v); + if (!error) { + value.assign(v.data(), v.size()); + } + return error; + } + /** * The error. */ simdjson_inline error_code error() const noexcept; -#if SIMDJSON_EXCEPTIONS + +#if SIMDJSON_EXCEPTIONS + using internal::simdjson_result_base::operator*; + using internal::simdjson_result_base::operator->; /** * Get the result value. * @@ -2733,7 +2887,7 @@ inline const std::string error_message(int error) noexcept; /* begin file simdjson/concepts.h */ #ifndef SIMDJSON_CONCEPTS_H #define SIMDJSON_CONCEPTS_H -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #include #include @@ -2765,14 +2919,32 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=) #undef SIMDJSON_IMPL_CONCEPT } // namespace details +template +concept is_pair = requires { typename T::first_type; typename T::second_type; } && + std::same_as>; +template +concept string_view_like = std::is_convertible_v && + !std::is_convertible_v; + +template +concept constructible_from_string_view = std::is_constructible_v + && !std::is_same_v + && std::is_default_constructible_v; + +template +concept string_view_keyed_map = string_view_like + && requires(std::remove_cvref_t& m, typename M::key_type sv, typename M::mapped_type v) { + { m.emplace(sv, v) } -> std::same_as>; +}; + /// Check if T is a container that we can append to, including: /// std::vector, std::deque, std::list, std::string, ... template concept appendable_containers = - details::supports_emplace_back || details::supports_emplace || + (details::supports_emplace_back || details::supports_emplace || details::supports_push_back || details::supports_push || details::supports_add || details::supports_append || - details::supports_insert; + details::supports_insert) && !string_view_keyed_map; /// Insert into the container however possible template @@ -2832,7 +3004,6 @@ concept optional_type = requires(std::remove_cvref_t obj) { { obj.value() } -> std::same_as::value_type&>; requires requires(typename std::remove_cvref_t::value_type &&val) { obj.emplace(std::move(val)); - obj = std::move(val); { obj.value_or(val) } -> std::convertible_to::value_type>; @@ -2840,11 +3011,69 @@ concept optional_type = requires(std::remove_cvref_t obj) { { static_cast(obj) } -> std::same_as; // convertible to bool }; + + } // namespace concepts } // namespace simdjson -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS #endif // SIMDJSON_CONCEPTS_H /* end file simdjson/concepts.h */ +/* including simdjson/constevalutil.h: #include "simdjson/constevalutil.h" */ +/* begin file simdjson/constevalutil.h */ +#ifndef SIMDJSON_CONSTEVALUTIL_H +#define SIMDJSON_CONSTEVALUTIL_H + +#include +#include +#include + +#if SIMDJSON_CONSTEVAL +namespace simdjson { +namespace constevalutil { + +constexpr static std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +constexpr static std::array control_chars = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; +// unoptimized, meant for compile-time execution +consteval std::string consteval_to_quoted_escaped(std::string_view input) { + std::string out = "\""; + for (char c : input) { + if (json_quotable_character[uint8_t(c)]) { + if (c == '"') { + out.append("\\\""); + } else if (c == '\\') { + out.append("\\\\"); + } else { + std::string_view v = control_chars[uint8_t(c)]; + out.append(v); + } + } else { + out.push_back(c); + } + } + out.push_back('"'); + return out; +} +} // namespace constevalutil +} // namespace simdjson +#endif // SIMDJSON_CONSTEVAL +#endif // SIMDJSON_CONSTEVALUTIL_H +/* end file simdjson/constevalutil.h */ /** * @brief The top level simdjson namespace, containing everything the library provides. @@ -4511,6 +4740,11 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886]; #include namespace simdjson { + +inline bool is_fatal(error_code error) noexcept { + return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT; +} + namespace internal { // We store the error code so we can validate the error message is associated with the right code struct error_code_info { @@ -4567,8 +4801,38 @@ simdjson_inline error_code simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_inline bool simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -4593,6 +4857,7 @@ simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { #endif // SIMDJSON_EXCEPTIONS + template simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { return this->first; @@ -4628,7 +4893,8 @@ simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noe } template -simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { +simdjson_warn_unused simdjson_inline error_code +simdjson_result::get(T &value) && noexcept { return std::forward>(*this).get(value); } @@ -4696,7 +4962,7 @@ namespace internal { { SUCCESS, "SUCCESS: No error" }, { CAPACITY, "CAPACITY: This parser can't support a document that big" }, { MEMALLOC, "MEMALLOC: Error allocating memory, we're most likely out of memory" }, - { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." }, + { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc. This is a fatal and unrecoverable error." }, { DEPTH_ERROR, "DEPTH_ERROR: The JSON document was too deep (too many nested objects and arrays)" }, { STRING_ERROR, "STRING_ERROR: Problem while parsing a string" }, { T_ATOM_ERROR, "T_ATOM_ERROR: Problem while parsing an atom starting with the letter 't'" }, @@ -4721,7 +4987,7 @@ namespace internal { { PARSER_IN_USE, "PARSER_IN_USE: Cannot parse a new document while a document is still in use." }, { OUT_OF_ORDER_ITERATION, "OUT_OF_ORDER_ITERATION: Objects and arrays can only be iterated when they are first encountered." }, { INSUFFICIENT_PADDING, "INSUFFICIENT_PADDING: simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." }, - { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array." }, + { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array. This is a fatal and unrecoverable error." }, { SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "}, { OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."}, { TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."} @@ -6787,7 +7053,7 @@ class document { * The memory allocation is strict: you * can you use this function to increase * or lower the amount of allocated memory. - * Passsing zero clears the memory. + * Passing zero clears the memory. */ error_code allocate(size_t len) noexcept; /** @private Capacity in bytes, in terms @@ -8147,6 +8413,12 @@ namespace { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } }; @@ -8223,7 +8495,7 @@ namespace { // Bit-specific operations simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } @@ -8236,7 +8508,12 @@ namespace { return lookup_table.apply_lookup_16_to(*this); } - + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes @@ -8559,6 +8836,32 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits) / 4; } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask64() + }; +} + + + } // unnamed namespace } // namespace arm64 } // namespace simdjson @@ -8985,6 +9288,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -8992,6 +9300,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -9033,6 +9351,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -9185,7 +9504,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -9695,7 +10014,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -10156,6 +10484,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -10394,8 +10728,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -10914,6 +11277,12 @@ namespace { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } }; @@ -10990,7 +11359,7 @@ namespace { // Bit-specific operations simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } @@ -11003,7 +11372,12 @@ namespace { return lookup_table.apply_lookup_16_to(*this); } - + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes @@ -11326,6 +11700,32 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits) / 4; } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask64() + }; +} + + + } // unnamed namespace } // namespace arm64 } // namespace simdjson @@ -13431,6 +13831,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for arm64 */ /* including generic/stage2/stringparsing.h for arm64: #include */ /* begin file generic/stage2/stringparsing.h for arm64 */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -13670,6 +14071,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace arm64 } // namespace simdjson @@ -14921,6 +15323,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace haswell } // namespace simdjson @@ -15345,6 +15772,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -15352,6 +15784,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -15393,6 +15835,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -15545,7 +15988,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -16055,7 +16498,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -16516,6 +16968,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -16754,8 +17212,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -17559,6 +18046,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace haswell } // namespace simdjson @@ -19662,6 +20174,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for haswell */ /* including generic/stage2/stringparsing.h for haswell: #include */ /* begin file generic/stage2/stringparsing.h for haswell */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -19901,6 +20414,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace haswell } // namespace simdjson @@ -20742,7 +21256,6 @@ namespace simd { friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); } - static const int SIZE = sizeof(base::value); template @@ -21085,6 +21598,35 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(uint64_t(escape_bits)); } + + __mmask64 escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + __mmask64 is_quote = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('"')); + __mmask64 is_backslash = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('\\')); + __mmask64 is_control = _mm512_cmplt_epi8_mask(v, _mm512_set1_epi8(32)); + return { + (is_backslash | is_quote | is_control) + }; +} + + + + } // unnamed namespace } // namespace icelake } // namespace simdjson @@ -21569,6 +22111,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -21576,6 +22123,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -21617,6 +22174,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -21769,7 +22327,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -22279,7 +22837,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -22740,6 +23307,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -22978,8 +23551,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -23376,7 +23978,6 @@ namespace simd { friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); } - static const int SIZE = sizeof(base::value); template @@ -23719,6 +24320,35 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(uint64_t(escape_bits)); } + + __mmask64 escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + __mmask64 is_quote = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('"')); + __mmask64 is_backslash = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('\\')); + __mmask64 is_control = _mm512_cmplt_epi8_mask(v, _mm512_set1_epi8(32)); + return { + (is_backslash | is_quote | is_control) + }; +} + + + + } // unnamed namespace } // namespace icelake } // namespace simdjson @@ -25882,6 +26512,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for icelake */ /* including generic/stage2/stringparsing.h for icelake: #include */ /* begin file generic/stage2/stringparsing.h for icelake */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -26121,6 +26752,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace icelake } // namespace simdjson @@ -27523,6 +28155,32 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + // We store it as a 64-bit bitmask even though we only need 16 bits. + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace ppc64 } // namespace simdjson @@ -27949,6 +28607,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -27956,6 +28619,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -27997,6 +28670,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -28149,7 +28823,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -28659,7 +29333,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -29120,6 +29803,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -29358,8 +30047,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -30272,6 +30990,32 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + // We store it as a 64-bit bitmask even though we only need 16 bits. + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace ppc64 } // namespace simdjson @@ -32377,6 +33121,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for ppc64 */ /* including generic/stage2/stringparsing.h for ppc64: #include */ /* begin file generic/stage2/stringparsing.h for ppc64 */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -32616,6 +33361,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace ppc64 } // namespace simdjson @@ -34267,6 +35013,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace westmere } // namespace simdjson @@ -34691,6 +35462,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -34698,6 +35474,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -34739,6 +35525,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -34891,7 +35678,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -35401,7 +36188,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -35862,6 +36658,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -36100,8 +36902,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -37331,6 +38162,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace westmere } // namespace simdjson @@ -39434,6 +40290,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for westmere */ /* including generic/stage2/stringparsing.h for westmere: #include */ /* begin file generic/stage2/stringparsing.h for westmere */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -39673,6 +40530,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace westmere } // namespace simdjson @@ -40201,7 +41059,6 @@ simdjson_warn_unused error_code dom_parser_implementation::parse(const uint8_t * if (error) { return error; } return stage2(_doc); } - } // namespace westmere } // namespace simdjson @@ -40831,6 +41688,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lsx } // namespace simdjson @@ -41257,6 +42139,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -41264,6 +42151,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -41305,6 +42202,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -41457,7 +42355,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -41967,7 +42865,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -42428,6 +43335,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -42666,8 +43579,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -43365,6 +44307,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lsx } // namespace simdjson @@ -45470,6 +46437,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for lsx */ /* including generic/stage2/stringparsing.h for lsx: #include */ /* begin file generic/stage2/stringparsing.h for lsx */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -45709,6 +46677,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace lsx } // namespace simdjson @@ -46842,6 +47811,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lasx } // namespace simdjson @@ -47268,6 +48262,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -47275,6 +48274,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -47316,6 +48325,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -47468,7 +48478,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -47978,7 +48988,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -48439,6 +49458,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -48677,8 +49702,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -49392,6 +50446,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lasx } // namespace simdjson @@ -51497,6 +52576,7 @@ simdjson_warn_unused simdjson_inline error_code json_iterator::visit_primitive(V /* end file generic/stage2/json_iterator.h for lasx */ /* including generic/stage2/stringparsing.h for lasx: #include */ /* begin file generic/stage2/stringparsing.h for lasx */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -51736,6 +52816,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace lasx } // namespace simdjson @@ -52365,6 +53446,24 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin return { src[0] }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits; } + simdjson_inline int escape_index() { return 0; } + + bool escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + dst[0] = src[0]; + return { (src[0] == '\\') || (src[0] == '"') || (src[0] < 32) }; +} + } // unnamed namespace } // namespace fallback } // namespace simdjson @@ -52878,6 +53977,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -52885,6 +53989,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -52926,6 +54040,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -53078,7 +54193,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -53588,7 +54703,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -54049,6 +55173,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -54287,8 +55417,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -54504,6 +55663,24 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin return { src[0] }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits; } + simdjson_inline int escape_index() { return 0; } + + bool escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + dst[0] = src[0]; + return { (src[0] == '\\') || (src[0] == '"') || (src[0] < 32) }; +} + } // unnamed namespace } // namespace fallback } // namespace simdjson @@ -54710,6 +55887,7 @@ simdjson_inline uint32_t find_next_document_index(dom_parser_implementation &par /* end file generic/stage1/find_next_document_index.h for fallback */ /* including generic/stage2/stringparsing.h for fallback: #include */ /* begin file generic/stage2/stringparsing.h for fallback */ +#include #ifndef SIMDJSON_SRC_GENERIC_STAGE2_STRINGPARSING_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -54949,6 +56127,7 @@ simdjson_warn_unused simdjson_inline uint8_t *parse_wobbly_string(const uint8_t } } // namespace stringparsing + } // unnamed namespace } // namespace fallback } // namespace simdjson @@ -55915,10 +57094,78 @@ simdjson_inline void validate_utf8_character() { idx += 4; } +static const uint8_t CHAR_TYPE_SPACE = 1 << 0; +static const uint8_t CHAR_TYPE_OPERATOR = 1 << 1; +static const uint8_t CHAR_TYPE_ESC_ASCII = 1 << 2; +static const uint8_t CHAR_TYPE_NON_ASCII = 1 << 3; + +const uint8_t char_table[256] = { + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, + 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x04, 0x02, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, + 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 +}; + +simdjson_inline bool char_is_type(uint8_t c, uint8_t type) { + return (char_table[c] & type); +} + +simdjson_inline bool char_is_space(uint8_t c) { + return char_is_type(c, CHAR_TYPE_SPACE); +} + +simdjson_inline bool char_is_operator(uint8_t c) { + return char_is_type(c, CHAR_TYPE_OPERATOR); +} + +simdjson_inline bool char_is_space_or_operator(uint8_t c) { + return char_is_type(c, CHAR_TYPE_SPACE | CHAR_TYPE_OPERATOR); +} + +simdjson_inline bool char_is_ascii_stop(uint8_t c) { + return char_is_type(c, CHAR_TYPE_ESC_ASCII | CHAR_TYPE_NON_ASCII); +} + // Returns true if the string is unclosed. simdjson_inline bool validate_string() { idx++; // skip first quote - while (idx < len && buf[idx] != '"') { + while (idx < len) { + do { + if (char_is_ascii_stop(buf[idx])) { break; } + idx++; + } while (idx < len); + if (idx >= len) { return true; } + if (buf[idx] == '"') { + return false; + } if (buf[idx] == '\\') { idx += 2; } else if (simdjson_unlikely(buf[idx] & 0x80)) { @@ -55932,43 +57179,31 @@ simdjson_inline bool validate_string() { return false; } -simdjson_inline bool is_whitespace_or_operator(uint8_t c) { - switch (c) { - case '{': case '}': case '[': case ']': case ',': case ':': - case ' ': case '\r': case '\n': case '\t': - return true; - default: - return false; - } -} - // // Parse the entire input in STEP_SIZE-byte chunks. // simdjson_inline error_code scan() { bool unclosed_string = false; for (;idx= len) { break; } + // String + if (buf[idx] == '"') { + add_structural(); + unclosed_string |= validate_string(); + // Operator + } else if (char_is_operator(buf[idx])) { + add_structural(); + // Primitive or invalid character (invalid characters will be checked in stage 2) + } else { + // Anything else, add the structural and go until we find the next one + add_structural(); + while (idx+1) #include #endif #endif +// The current specification is unclear on how we detect +// static reflection, both __cpp_lib_reflection and +// __cpp_impl_reflection are proposed in the draft specification. +// For now, we disable static reflect by default. It must be +// specified at compiler time. +#ifndef SIMDJSON_STATIC_REFLECTION +#define SIMDJSON_STATIC_REFLECTION 0 // disabled by default. +#endif + #if defined(__apple_build_version__) #if __apple_build_version__ < 14000000 #define SIMDJSON_CONCEPT_DISABLED 1 // apple-clang/13 doesn't support std::convertible_to #endif #endif +#if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 201911L +#include +#define SIMDJSON_SUPPORTS_RANGES 1 +#else +#define SIMDJSON_SUPPORTS_RANGES 0 +#endif #if defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) #if __cpp_concepts >= 201907L #include -#define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#define SIMDJSON_SUPPORTS_CONCEPTS 1 #else -#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#define SIMDJSON_SUPPORTS_CONCEPTS 0 #endif #else // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) -#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#define SIMDJSON_SUPPORTS_CONCEPTS 0 #endif // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED) +// copy SIMDJSON_SUPPORTS_CONCEPTS to SIMDJSON_SUPPORTS_DESERIALIZATION. +#if SIMDJSON_SUPPORTS_CONCEPTS +#define SIMDJSON_SUPPORTS_DESERIALIZATION 1 +#else +#define SIMDJSON_SUPPORTS_DESERIALIZATION 0 +#endif + + +#if !defined(SIMDJSON_CONSTEVAL) +#if defined(__cpp_consteval) && __cpp_consteval >= 201811L && defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 201907L +#define SIMDJSON_CONSTEVAL 1 +#else +#define SIMDJSON_CONSTEVAL 0 +#endif // defined(__cpp_consteval) && __cpp_consteval >= 201811L && defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 201907L +#endif // !defined(SIMDJSON_CONSTEVAL) + #endif // SIMDJSON_COMPILER_CHECK_H /* end file simdjson/compiler_check.h */ /* including simdjson/portability.h: #include "simdjson/portability.h" */ @@ -172,6 +213,22 @@ using std::size_t; #define SIMDJSON_IS_ARM64 1 #elif defined(__riscv) && __riscv_xlen == 64 #define SIMDJSON_IS_RISCV64 1 + #if __riscv_v_intrinsic >= 11000 + #define SIMDJSON_HAS_RVV_INTRINSICS 1 + #endif + + #define SIMDJSON_HAS_ZVBB_INTRINSICS \ + 0 // there is currently no way to detect this + + #if SIMDJSON_HAS_RVV_INTRINSICS && __riscv_vector && \ + __riscv_v_min_vlen >= 128 && __riscv_v_elen >= 64 + // RISC-V V extension + #define SIMDJSON_IS_RVV 1 + #if SIMDJSON_HAS_ZVBB_INTRINSICS && __riscv_zvbb >= 1000000 + // RISC-V Vector Basic Bit-manipulation + #define SIMDJSON_IS_ZVBB 1 + #endif + #endif #elif defined(__loongarch_lp64) #define SIMDJSON_IS_LOONGARCH64 1 #elif defined(__PPC64__) || defined(_M_PPC64) @@ -318,6 +375,7 @@ using std::size_t; #if defined(NDEBUG) || defined(__OPTIMIZE__) || (defined(_MSC_VER) && !defined(_DEBUG)) // If NDEBUG is set, or __OPTIMIZE__ is set, or we are under MSVC in release mode, // then do away with asserts and use __assume. +// We still recommend that our users set NDEBUG in release mode. #if SIMDJSON_VISUAL_STUDIO #define SIMDJSON_UNREACHABLE() __assume(0) #define SIMDJSON_ASSUME(COND) __assume(COND) @@ -393,7 +451,7 @@ double from_chars(const char *first, const char* end) noexcept; } #ifndef SIMDJSON_EXCEPTIONS -#if __cpp_exceptions +#if defined(__cpp_exceptions) || defined(_CPPUNWIND) #define SIMDJSON_EXCEPTIONS 1 #else #define SIMDJSON_EXCEPTIONS 0 @@ -569,17 +627,6 @@ double from_chars(const char *first, const char* end) noexcept; // We assume by default static linkage #define SIMDJSON_DLLIMPORTEXPORT #endif - -/** - * Workaround for the vcpkg package manager. Only vcpkg should - * ever touch the next line. The SIMDJSON_USING_LIBRARY macro is otherwise unused. - */ -#if SIMDJSON_USING_LIBRARY -#define SIMDJSON_DLLIMPORTEXPORT __declspec(dllimport) -#endif -/** - * End of workaround for the vcpkg package manager. - */ #else #define SIMDJSON_DLLIMPORTEXPORT #endif @@ -596,12 +643,14 @@ double from_chars(const char *first, const char* end) noexcept; // even if we do not have C++17 support. #ifdef __cpp_lib_string_view #define SIMDJSON_HAS_STRING_VIEW +#include #endif // Some systems have string_view even if we do not have C++17 support, // and even if __cpp_lib_string_view is undefined, it is the case // with Apple clang version 11. // We must handle it. *This is important.* +#ifndef _MSC_VER #ifndef SIMDJSON_HAS_STRING_VIEW #if defined __has_include // do not combine the next #if with the previous one (unsafe) @@ -617,6 +666,7 @@ double from_chars(const char *first, const char* end) noexcept; #endif // __has_include () #endif // defined __has_include #endif // def SIMDJSON_HAS_STRING_VIEW +#endif // def _MSC_VER // end of complicated but important routine to try to detect string_view. // @@ -796,22 +846,22 @@ inline namespace literals { inline namespace string_view_literals { -constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1) +constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1) { return std::string_view{ str, len }; } -constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2) +constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2) { return std::u16string_view{ str, len }; } -constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3) +constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3) { return std::u32string_view{ str, len }; } -constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4) +constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4) { return std::wstring_view{ str, len }; } @@ -1039,9 +1089,9 @@ using std::operator<<; #endif #if nssv_HAVE_NODISCARD -# define nssv_nodiscard [[nodiscard]] +# define nssv_nodiscard simdjson_warn_unused #else -# define nssv_nodiscard /*[[nodiscard]]*/ +# define nssv_nodiscard /*simdjson_warn_unused*/ #endif // Additional includes: @@ -2142,22 +2192,22 @@ nssv_inline_ns namespace string_view_literals { #if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS -nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2166,22 +2216,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, #if nssv_CONFIG_USR_SV_OPERATOR -nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1) +nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1) { return nonstd::sv_lite::string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2) +nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2) { return nonstd::sv_lite::u16string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3) +nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3) { return nonstd::sv_lite::u32string_view{ str, len }; } -nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) +nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4) { return nonstd::sv_lite::wstring_view{ str, len }; } @@ -2359,16 +2409,25 @@ namespace std { // It could also wrongly set SIMDJSON_DEVELOPMENT_CHECKS (e.g., if the programmer // sets _DEBUG in a release build under Visual Studio, or if some compiler fails to // set the __OPTIMIZE__ macro). +// We make it so that if NDEBUG is defined, then SIMDJSON_DEVELOPMENT_CHECKS +// is not defined, irrespective of the compiler. +// We recommend that users set NDEBUG in release builds, so that +// SIMDJSON_DEVELOPMENT_CHECKS is not defined in release builds by default, +// irrespective of the compiler. #ifndef SIMDJSON_DEVELOPMENT_CHECKS #ifdef _MSC_VER // Visual Studio seems to set _DEBUG for debug builds. -#ifdef _DEBUG +// We set SIMDJSON_DEVELOPMENT_CHECKS to 1 if _DEBUG is defined +// and NDEBUG is not defined. +#if defined(_DEBUG) && !defined(NDEBUG) #define SIMDJSON_DEVELOPMENT_CHECKS 1 #endif // _DEBUG #else // _MSC_VER // All other compilers appear to set __OPTIMIZE__ to a positive integer // when the compiler is optimizing. -#ifndef __OPTIMIZE__ +// We only set SIMDJSON_DEVELOPMENT_CHECKS if both __OPTIMIZE__ +// and NDEBUG are not defined. +#if !defined(__OPTIMIZE__) && !defined(NDEBUG) #define SIMDJSON_DEVELOPMENT_CHECKS 1 #endif // __OPTIMIZE__ #endif // _MSC_VER @@ -2424,6 +2483,18 @@ namespace std { #define SIMDJSON_AVX512_ALLOWED 1 #endif + +#ifndef __has_cpp_attribute +#define simdjson_lifetime_bound +#elif __has_cpp_attribute(msvc::lifetimebound) +#define simdjson_lifetime_bound [[msvc::lifetimebound]] +#elif __has_cpp_attribute(clang::lifetimebound) +#define simdjson_lifetime_bound [[clang::lifetimebound]] +#elif __has_cpp_attribute(lifetimebound) +#define simdjson_lifetime_bound [[lifetimebound]] +#else +#define simdjson_lifetime_bound +#endif #endif // SIMDJSON_COMMON_DEFS_H /* end file simdjson/common_defs.h */ @@ -2437,22 +2508,22 @@ namespace std { #define SIMDJSON_SIMDJSON_VERSION_H /** The version of simdjson being used (major.minor.revision) */ -#define SIMDJSON_VERSION "3.12.2" +#define SIMDJSON_VERSION "4.0.0" namespace simdjson { enum { /** * The major version (MAJOR.minor.revision) of simdjson being used. */ - SIMDJSON_VERSION_MAJOR = 3, + SIMDJSON_VERSION_MAJOR = 4, /** * The minor version (major.MINOR.revision) of simdjson being used. */ - SIMDJSON_VERSION_MINOR = 12, + SIMDJSON_VERSION_MINOR = 0, /** * The revision (major.minor.REVISION) of simdjson being used. */ - SIMDJSON_VERSION_REVISION = 2 + SIMDJSON_VERSION_REVISION = 0 }; } // namespace simdjson @@ -2494,7 +2565,7 @@ enum error_code { SUCCESS = 0, ///< No error CAPACITY, ///< This parser can't support a document that big MEMALLOC, ///< Error allocating memory, most likely out of memory - TAPE_ERROR, ///< Something went wrong, this is a generic error + TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error. DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation STRING_ERROR, ///< Problem while parsing a string T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't' @@ -2519,13 +2590,22 @@ enum error_code { PARSER_IN_USE, ///< parser is already in use. OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1) INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it. - INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. + INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error. SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value. OUT_OF_BOUNDS, ///< Attempted to access location outside of document. TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input - NUM_ERROR_CODES + OUT_OF_CAPACITY, ///< The capacity was exceeded, we cannot allocate enough memory. + NUM_ERROR_CODES ///< Placeholder for end of error code list. }; +/** + * Some errors are fatal and invalidate the document. This function returns true if the + * error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT. + * Once a fatal error is encountered, the on-demand document is no longer valid and + * processing should stop. + */ + inline bool is_fatal(error_code error) noexcept; + /** * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code @@ -2573,6 +2653,10 @@ namespace internal { /** * The result of a simdjson operation that could fail. * + * IMPORTANT: For the ondemand API, we use implementation_simdjson_result_base as a base class + * to avoid some compilation issue. Thus, if you modify this class, please ensure that the ondemand + * implementation_simdjson_result_base is also modified. + * * Gives the option of reading error codes, or throwing an exception by casting to the desired result. * * This is a base class for implementations that want to add functions to the result type for @@ -2633,8 +2717,27 @@ struct simdjson_result_base : protected std::pair { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; #if SIMDJSON_EXCEPTIONS + /** + * Dereference operator to access the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + /** * Get the result value. * @@ -2662,17 +2765,48 @@ struct simdjson_result_base : protected std::pair { * @throw simdjson_error if there was an error. */ simdjson_inline operator T&&() && noexcept(false); + #endif // SIMDJSON_EXCEPTIONS /** * Get the result value. This function is safe if and only * the error() method returns a value that evaluates to false. + * We discourage the use of value_unsafe(). + * + * The recommended pattern is: + * + * T value; // where T is the type + * auto error = result.get(value); + * if (error) { + * // handle error + * } + * + * Or you may call 'value()' which will raise an exception + * in case of error: + * + * T value = result.value(); */ simdjson_inline const T& value_unsafe() const& noexcept; /** * Take the result value (move it). This function is safe if and only * the error() method returns a value that evaluates to false. + * We discourage the use of value_unsafe(). + * + * The recommended pattern is: + * + * T value; // where T is the type + * auto error = result.get(value); + * if (error) { + * // handle error, return, exit, abort + * } else { + * // use value here. + * } + * + * Or you may call 'value()' which will raise an exception + * in case of error: + * + * T value = result.value(); */ simdjson_inline T&& value_unsafe() && noexcept; @@ -2687,6 +2821,7 @@ struct simdjson_result_base : protected std::pair { */ template struct simdjson_result : public internal::simdjson_result_base { + /** * @private Create a new empty result with error = UNINITIALIZED. */ @@ -2719,13 +2854,32 @@ struct simdjson_result : public internal::simdjson_result_base { */ simdjson_warn_unused simdjson_inline error_code get(T &value) && noexcept; + /** + * Copy the value to a provided std::string, only enabled for std::string_view. + * + * @param value The variable to assign the value to. May not be set if there is an error. + */ + template + simdjson_warn_unused simdjson_inline error_code get(std::string &value) && noexcept { + static_assert(std::is_same::value, "SFINAE"); + std::string_view v; + error_code error = std::forward>(*this).get(v); + if (!error) { + value.assign(v.data(), v.size()); + } + return error; + } + /** * The error. */ simdjson_inline error_code error() const noexcept; -#if SIMDJSON_EXCEPTIONS + +#if SIMDJSON_EXCEPTIONS + using internal::simdjson_result_base::operator*; + using internal::simdjson_result_base::operator->; /** * Get the result value. * @@ -2796,7 +2950,7 @@ inline const std::string error_message(int error) noexcept; /* begin file simdjson/concepts.h */ #ifndef SIMDJSON_CONCEPTS_H #define SIMDJSON_CONCEPTS_H -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #include #include @@ -2828,14 +2982,32 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=) #undef SIMDJSON_IMPL_CONCEPT } // namespace details +template +concept is_pair = requires { typename T::first_type; typename T::second_type; } && + std::same_as>; +template +concept string_view_like = std::is_convertible_v && + !std::is_convertible_v; + +template +concept constructible_from_string_view = std::is_constructible_v + && !std::is_same_v + && std::is_default_constructible_v; + +template +concept string_view_keyed_map = string_view_like + && requires(std::remove_cvref_t& m, typename M::key_type sv, typename M::mapped_type v) { + { m.emplace(sv, v) } -> std::same_as>; +}; + /// Check if T is a container that we can append to, including: /// std::vector, std::deque, std::list, std::string, ... template concept appendable_containers = - details::supports_emplace_back || details::supports_emplace || + (details::supports_emplace_back || details::supports_emplace || details::supports_push_back || details::supports_push || details::supports_add || details::supports_append || - details::supports_insert; + details::supports_insert) && !string_view_keyed_map; /// Insert into the container however possible template @@ -2895,7 +3067,6 @@ concept optional_type = requires(std::remove_cvref_t obj) { { obj.value() } -> std::same_as::value_type&>; requires requires(typename std::remove_cvref_t::value_type &&val) { obj.emplace(std::move(val)); - obj = std::move(val); { obj.value_or(val) } -> std::convertible_to::value_type>; @@ -2903,11 +3074,69 @@ concept optional_type = requires(std::remove_cvref_t obj) { { static_cast(obj) } -> std::same_as; // convertible to bool }; + + } // namespace concepts } // namespace simdjson -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS #endif // SIMDJSON_CONCEPTS_H /* end file simdjson/concepts.h */ +/* including simdjson/constevalutil.h: #include "simdjson/constevalutil.h" */ +/* begin file simdjson/constevalutil.h */ +#ifndef SIMDJSON_CONSTEVALUTIL_H +#define SIMDJSON_CONSTEVALUTIL_H + +#include +#include +#include + +#if SIMDJSON_CONSTEVAL +namespace simdjson { +namespace constevalutil { + +constexpr static std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +constexpr static std::array control_chars = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; +// unoptimized, meant for compile-time execution +consteval std::string consteval_to_quoted_escaped(std::string_view input) { + std::string out = "\""; + for (char c : input) { + if (json_quotable_character[uint8_t(c)]) { + if (c == '"') { + out.append("\\\""); + } else if (c == '\\') { + out.append("\\\\"); + } else { + std::string_view v = control_chars[uint8_t(c)]; + out.append(v); + } + } else { + out.push_back(c); + } + } + out.push_back('"'); + return out; +} +} // namespace constevalutil +} // namespace simdjson +#endif // SIMDJSON_CONSTEVAL +#endif // SIMDJSON_CONSTEVALUTIL_H +/* end file simdjson/constevalutil.h */ /** * @brief The top level simdjson namespace, containing everything the library provides. @@ -2970,6 +3199,11 @@ enum class tape_type; #include namespace simdjson { + +inline bool is_fatal(error_code error) noexcept { + return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT; +} + namespace internal { // We store the error code so we can validate the error message is associated with the right code struct error_code_info { @@ -3026,8 +3260,38 @@ simdjson_inline error_code simdjson_result_base::error() const noexcept { return this->second; } + +template +simdjson_inline bool simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS + +template +simdjson_inline T& simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -3052,6 +3316,7 @@ simdjson_inline simdjson_result_base::operator T&&() && noexcept(false) { #endif // SIMDJSON_EXCEPTIONS + template simdjson_inline const T& simdjson_result_base::value_unsafe() const& noexcept { return this->first; @@ -3087,7 +3352,8 @@ simdjson_inline void simdjson_result::tie(T &value, error_code &error) && noe } template -simdjson_warn_unused simdjson_inline error_code simdjson_result::get(T &value) && noexcept { +simdjson_warn_unused simdjson_inline error_code +simdjson_result::get(T &value) && noexcept { return std::forward>(*this).get(value); } @@ -3954,6 +4220,9 @@ class padded_string_view : public std::string_view { /** The number of allocated bytes. */ inline size_t capacity() const noexcept; + /** check that the view has sufficient padding */ + inline bool has_sufficient_padding() const noexcept; + /** * Remove the UTF-8 Byte Order Mark (BOM) if it exists. * @@ -3980,14 +4249,24 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result= SIMDJSON_PADDING) { + return true; + } + size_t missing_padding = SIMDJSON_PADDING - padding(); + if(length() < missing_padding) { return false; } + + for (size_t i = length() - missing_padding; i < length(); i++) { + char c = data()[i]; + if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { + return false; + } + } + return true; +} + inline size_t padded_string_view::capacity() const noexcept { return _capacity; } inline size_t padded_string_view::padding() const noexcept { return capacity() - length(); } @@ -4052,10 +4347,33 @@ inline std::ostream& operator<<(std::ostream& out, simdjson_result 0; i--) { + char c = s[i - 1]; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { + existing_padding++; + } else { + break; + } + } + size_t needed_padding = 0; + if (existing_padding < SIMDJSON_PADDING) { + needed_padding = SIMDJSON_PADDING - existing_padding; + s.append(needed_padding, ' '); + } + + return padded_string_view(s.data(), s.size() - needed_padding, s.size()); } + +inline padded_string_view pad_with_reserve(std::string& s) noexcept { + if (s.capacity() - s.size() < SIMDJSON_PADDING) { + s.reserve(s.size() + SIMDJSON_PADDING ); + } + return padded_string_view(s.data(), s.size(), s.capacity()); +} + + + } // namespace simdjson @@ -4180,9 +4498,9 @@ inline const char *padded_string::data() const noexcept { return data_ptr; } inline char *padded_string::data() noexcept { return data_ptr; } -inline padded_string::operator std::string_view() const { return std::string_view(data(), length()); } +inline padded_string::operator std::string_view() const simdjson_lifetime_bound { return std::string_view(data(), length()); } -inline padded_string::operator padded_string_view() const noexcept { +inline padded_string::operator padded_string_view() const noexcept simdjson_lifetime_bound { return padded_string_view(data(), length(), length() + SIMDJSON_PADDING); } @@ -4322,6 +4640,8 @@ class tape_ref; #ifndef SIMDJSON_DOM_ARRAY_H #define SIMDJSON_DOM_ARRAY_H +#include + /* skipped duplicate #include "simdjson/dom/base.h" */ /* including simdjson/internal/tape_ref.h: #include "simdjson/internal/tape_ref.h" */ /* begin file simdjson/internal/tape_ref.h */ @@ -4429,7 +4749,7 @@ class array { iterator& operator=(const iterator&) noexcept = default; private: simdjson_inline iterator(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; + internal::tape_ref tape{}; friend class array; }; @@ -4480,6 +4800,17 @@ class array { */ inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + /** + * Recursive function which processes the json path of each child element + */ + inline void process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept; + + + /** + * Adds support for JSONPath expression with wildcards '*' + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) const noexcept; + /** * Get the value associated with the given JSONPath expression. We only support * JSONPath queries that trivially convertible to JSON Pointer queries: key @@ -4513,6 +4844,15 @@ class array { */ inline simdjson_result at(size_t index) const noexcept; + /** + * Gets the values of items in an array element + * This function has linear-time complexity: the values are checked one by one. + * + * @return The child elements of an array + */ + + inline std::vector& get_values(std::vector& out) const noexcept; + /** * Implicitly convert object to element */ @@ -4520,7 +4860,7 @@ class array { private: simdjson_inline array(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; + internal::tape_ref tape{}; friend class element; friend struct simdjson_result; template @@ -4539,8 +4879,11 @@ struct simdjson_result : public internal::simdjson_result_base at_pointer(std::string_view json_pointer) const noexcept; + inline void process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept; + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) const noexcept; inline simdjson_result at_path(std::string_view json_path) const noexcept; inline simdjson_result at(size_t index) const noexcept; + inline std::vector& get_values(std::vector& out) const noexcept; #if SIMDJSON_EXCEPTIONS inline dom::array::iterator begin() const noexcept(false); @@ -4553,9 +4896,7 @@ struct simdjson_result : public internal::simdjson_result_base - +#if SIMDJSON_SUPPORTS_RANGES namespace std { namespace ranges { template<> @@ -4566,7 +4907,7 @@ inline constexpr bool enable_view load(const std::string &path) & noexcept; - inline simdjson_result load(const std::string &path) && = delete ; + inline simdjson_result load(std::string_view path) & noexcept; + inline simdjson_result load(std::string_view path) && = delete ; /** * Load a JSON document from a file into a provide document instance and return a temporary reference to it. @@ -4821,8 +5162,8 @@ class parser { * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ - inline simdjson_result load_into_document(document& doc, const std::string &path) & noexcept; - inline simdjson_result load_into_document(document& doc, const std::string &path) && =delete; + inline simdjson_result load_into_document(document& doc, std::string_view path) & noexcept; + inline simdjson_result load_into_document(document& doc, std::string_view path) && =delete; /** * Parse a JSON document and return a temporary reference to it. @@ -5013,7 +5354,7 @@ class parser { * arrays or objects) MUST be separated with whitespace. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. - * Setting batch_size to excessively large or excesively small values may impact negatively the + * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### Error Handling @@ -5059,7 +5400,7 @@ class parser { * - other json errors if parsing fails. You should not rely on these errors to always the same for the * same document: they may vary under runtime dispatch (so they may vary depending on your system and hardware). */ - inline simdjson_result load_many(const std::string &path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; + inline simdjson_result load_many(std::string_view path, size_t batch_size = dom::DEFAULT_BATCH_SIZE) noexcept; /** * Parse a buffer containing many JSON documents. @@ -5107,7 +5448,7 @@ class parser { * arrays or objects) MUST be separated with whitespace. * * The documents must not exceed batch_size bytes (by default 1MB) or they will fail to parse. - * Setting batch_size to excessively large or excesively small values may impact negatively the + * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * * ### Error Handling @@ -5330,7 +5671,7 @@ class parser { inline error_code ensure_capacity(document& doc, size_t desired_capacity) noexcept; /** Read the file into loaded_bytes */ - inline simdjson_result read_file(const std::string &path) noexcept; + inline simdjson_result read_file(std::string_view path) noexcept; friend class parser::Iterator; friend class document_stream; @@ -5667,6 +6008,8 @@ struct simdjson_result : public internal::simdjson_result_ #ifndef SIMDJSON_DOM_ELEMENT_H #define SIMDJSON_DOM_ELEMENT_H +#include + /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/array.h" */ @@ -6065,6 +6408,8 @@ class element { */ inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + inline simdjson_result> at_path_with_wildcard(const std::string_view json_path) const noexcept; + /** * Get the value associated with the given JSONPath expression. We only support * JSONPath queries that trivially convertible to JSON Pointer queries: key @@ -6158,7 +6503,7 @@ class element { private: simdjson_inline element(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; + internal::tape_ref tape{}; friend class document; friend class object; friend class array; @@ -6210,6 +6555,7 @@ struct simdjson_result : public internal::simdjson_result_base operator[](const char *key) const noexcept; simdjson_result operator[](int) const noexcept = delete; simdjson_inline simdjson_result at_pointer(const std::string_view json_pointer) const noexcept; + simdjson_inline simdjson_result> at_path_with_wildcard(const std::string_view json_path) const noexcept; simdjson_inline simdjson_result at_path(const std::string_view json_path) const noexcept; [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] simdjson_inline simdjson_result at(const std::string_view json_pointer) const noexcept; @@ -6241,6 +6587,8 @@ struct simdjson_result : public internal::simdjson_result_base + /* skipped duplicate #include "simdjson/dom/base.h" */ /* skipped duplicate #include "simdjson/dom/element.h" */ /* skipped duplicate #include "simdjson/internal/tape_ref.h" */ @@ -6329,7 +6677,7 @@ class object { private: simdjson_inline iterator(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; + internal::tape_ref tape{}; friend class object; }; @@ -6412,6 +6760,16 @@ class object { */ inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + /** + * Recursive function which processes the json path of each child element + */ + inline void process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept; + + /** + * Adds support for JSONPath expression with wildcards '*' + */ + inline simdjson_result> at_path_with_wildcard(std::string_view json_path) const noexcept; + /** * Get the value associated with the given JSONPath expression. We only support * JSONPath queries that trivially convertible to JSON Pointer queries: key @@ -6443,6 +6801,14 @@ class object { */ inline simdjson_result at_key(std::string_view key) const noexcept; + /** + * Gets the values associated with keys of an object + * This function has linear-time complexity: the keys are checked one by one. + * + * @return the values associated with each key of an object + */ + inline std::vector& get_values(std::vector& out) const noexcept; + /** * Get the value associated with the given key in a case-insensitive manner. * It is only guaranteed to work over ASCII inputs. @@ -6464,7 +6830,7 @@ class object { private: simdjson_inline object(const internal::tape_ref &tape) noexcept; - internal::tape_ref tape; + internal::tape_ref tape{}; friend class element; friend struct simdjson_result; @@ -6501,8 +6867,11 @@ struct simdjson_result : public internal::simdjson_result_base operator[](const char *key) const noexcept; simdjson_result operator[](int) const noexcept = delete; inline simdjson_result at_pointer(std::string_view json_pointer) const noexcept; + inline void process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept; + inline simdjson_result> at_path_with_wildcard(std::string_view json_path_new) const noexcept; inline simdjson_result at_path(std::string_view json_path) const noexcept; inline simdjson_result at_key(std::string_view key) const noexcept; + inline std::vector& get_values(std::vector& out) const noexcept; inline simdjson_result at_key_case_insensitive(std::string_view key) const noexcept; #if SIMDJSON_EXCEPTIONS @@ -6514,9 +6883,7 @@ struct simdjson_result : public internal::simdjson_result_base - +#if SIMDJSON_SUPPORTS_RANGES namespace std { namespace ranges { template<> @@ -6527,7 +6894,7 @@ inline constexpr bool enable_view - namespace simdjson { /** @@ -6552,8 +6917,7 @@ namespace simdjson { */ namespace internal { -template -class base_formatter { +template class base_formatter { public: /** Add a comma **/ simdjson_inline void comma(); @@ -6592,24 +6956,76 @@ class base_formatter { /** Prints one character **/ simdjson_inline void one_char(char c); + /** Prints characters in [begin, end) verbatim. **/ + simdjson_inline void chars(const char *begin, const char *end); + simdjson_inline void call_print_newline() { - static_cast(this)->print_newline(); + static_cast(this)->print_newline(); } simdjson_inline void call_print_indents(size_t depth) { - static_cast(this)->print_indents(depth); + static_cast(this)->print_indents(depth); } simdjson_inline void call_print_space() { - static_cast(this)->print_space(); + static_cast(this)->print_space(); } protected: // implementation details (subject to change) /** Backing buffer **/ - std::vector buffer{}; // not ideal! -}; + struct vector_with_small_buffer { + vector_with_small_buffer() = default; + ~vector_with_small_buffer() { free_buffer(); } + + vector_with_small_buffer(const vector_with_small_buffer &) = delete; + vector_with_small_buffer & + operator=(const vector_with_small_buffer &) = delete; + + void clear() { + size = 0; + capacity = StaticCapacity; + free_buffer(); + buffer = array; + } + simdjson_inline void push_back(char c) { + if (capacity < size + 1) + grow(capacity * 2); + buffer[size++] = c; + } + + simdjson_inline void append(const char *begin, const char *end) { + const size_t new_size = size + (end - begin); + if (capacity < new_size) + // std::max(new_size, capacity * 2); is broken in tests on Windows + grow(new_size < capacity * 2 ? capacity * 2 : new_size); + std::copy(begin, end, buffer + size); + size = new_size; + } + + std::string_view str() const { return std::string_view(buffer, size); } + + private: + void free_buffer() { + if (buffer != array) + delete[] buffer; + } + void grow(size_t new_capacity) { + auto new_buffer = new char[new_capacity]; + std::copy(buffer, buffer + size, new_buffer); + free_buffer(); + buffer = new_buffer; + capacity = new_capacity; + } + + static const size_t StaticCapacity = 64; + char array[StaticCapacity]; + char *buffer = array; + size_t size = 0; + size_t capacity = StaticCapacity; + } buffer{}; +}; /** * @private This is the class that we expect to use with the string_builder @@ -6643,9 +7059,11 @@ class pretty_formatter : public base_formatter { * by a "formatter" which handles the details. Thus * the string_builder template could support both minification * and prettification, and various other tradeoffs. + * + * This is not to be confused with the simdjson::builder::string_builder + * which is a different class. */ -template -class string_builder { +template class string_builder { public: /** Construct an initially empty builder, would print the empty string **/ string_builder() = default; @@ -6667,11 +7085,12 @@ class string_builder { simdjson_inline std::string_view str() const; /** Append a key_value_pair to the builder (to be printed) **/ simdjson_inline void append(simdjson::dom::key_value_pair value); + private: formatter format{}; }; -} // internal +} // namespace internal namespace dom { @@ -6680,33 +7099,43 @@ namespace dom { * * @param out The output stream. * @param value The element. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + * @throw if there is an error with the underlying output stream. simdjson + * itself will not throw. */ -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value); +inline std::ostream &operator<<(std::ostream &out, + simdjson::dom::element value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. * * @param out The output stream. * @param value The array. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + * @throw if there is an error with the underlying output stream. simdjson + * itself will not throw. */ -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value); +inline std::ostream &operator<<(std::ostream &out, simdjson::dom::array value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x); #endif /** * Print JSON to an output stream. * * @param out The output stream. * @param value The object. - * @throw if there is an error with the underlying output stream. simdjson itself will not throw. + * @throw if there is an error with the underlying output stream. simdjson + * itself will not throw. */ -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value); +inline std::ostream &operator<<(std::ostream &out, simdjson::dom::object value); #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x); #endif } // namespace dom @@ -6718,47 +7147,47 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result -std::string to_string(T x) { - // in C++, to_string is standard: http://www.cplusplus.com/reference/string/to_string/ - // Currently minify and to_string are identical but in the future, they may - // differ. - simdjson::internal::string_builder<> sb; - sb.append(x); - std::string_view answer = sb.str(); - return std::string(answer.data(), answer.size()); +template std::string to_string(T x) { + // in C++, to_string is standard: + // http://www.cplusplus.com/reference/string/to_string/ Currently minify and + // to_string are identical but in the future, they may differ. + simdjson::internal::string_builder<> sb; + sb.append(x); + std::string_view answer = sb.str(); + return std::string(answer.data(), answer.size()); } #if SIMDJSON_EXCEPTIONS -template -std::string to_string(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); +template std::string to_string(simdjson_result x) { + if (x.error()) { + throw simdjson_error(x.error()); + } + return to_string(x.value()); } #endif /** - * Minifies a JSON element or document, printing the smallest possible valid JSON. + * Minifies a JSON element or document, printing the smallest possible valid + * JSON. * * dom::parser parser; * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); * cout << minify(doc) << endl; // prints [1,2,3] * */ -template -std::string minify(T x) { - return to_string(x); -} +template std::string minify(T x) { return to_string(x); } #if SIMDJSON_EXCEPTIONS -template -std::string minify(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); +template std::string minify(simdjson_result x) { + if (x.error()) { + throw simdjson_error(x.error()); + } + return to_string(x.value()); } #endif /** - * Prettifies a JSON element or document, printing the valid JSON with indentation. + * Prettifies a JSON element or document, printing the valid JSON with + * indentation. * * dom::parser parser; * element doc = parser.parse(" [ 1 , 2 , 3 ] "_padded); @@ -6774,25 +7203,24 @@ std::string minify(simdjson_result x) { * cout << prettify(doc) << endl; * */ -template -std::string prettify(T x) { - simdjson::internal::string_builder sb; - sb.append(x); - std::string_view answer = sb.str(); - return std::string(answer.data(), answer.size()); +template std::string prettify(T x) { + simdjson::internal::string_builder sb; + sb.append(x); + std::string_view answer = sb.str(); + return std::string(answer.data(), answer.size()); } #if SIMDJSON_EXCEPTIONS -template -std::string prettify(simdjson_result x) { - if (x.error()) { throw simdjson_error(x.error()); } - return to_string(x.value()); +template std::string prettify(simdjson_result x) { + if (x.error()) { + throw simdjson_error(x.error()); + } + return to_string(x.value()); } #endif } // namespace simdjson - #endif /* end file simdjson/dom/serialization.h */ @@ -6814,7 +7242,9 @@ std::string prettify(simdjson_result x) { #define SIMDJSON_JSONPATHUTIL_H #include -#include +/* skipped duplicate #include "simdjson/common_defs.h" */ + +#include namespace simdjson { /** @@ -6824,12 +7254,12 @@ namespace simdjson { */ inline std::string json_path_to_pointer_conversion(std::string_view json_path) { size_t i = 0; - // if JSONPath starts with $, skip it + // json_path.starts_with('$') requires C++20. if (!json_path.empty() && json_path.front() == '$') { i = 1; } - if (json_path.empty() || (json_path[i] != '.' && + if (i >= json_path.size() || (json_path[i] != '.' && json_path[i] != '[')) { return "-1"; // This is just a sentinel value, the caller should check for this and return an error. } @@ -6872,6 +7302,49 @@ inline std::string json_path_to_pointer_conversion(std::string_view json_path) { return result; } + +inline std::pair get_next_key_and_json_path(std::string_view& json_path) { + std::string_view key; + + if (json_path.empty()) { + return {key, json_path}; + } + size_t i = 0; + + // if JSONPath starts with $, skip it + if (json_path.front() == '$') { + i = 1; + } + + + if (i < json_path.length() && json_path[i] == '.') { + i += 1; + size_t key_start = i; + + while (i < json_path.length() && json_path[i] != '[' && json_path[i] != '.') { + ++i; + } + + key = json_path.substr(key_start, i - key_start); + } else if ((i+1 < json_path.size()) && json_path[i] == '[' && (json_path[i+1] == '\'' || json_path[i+1] == '"')) { + i += 2; + size_t key_start = i; + while (i < json_path.length() && json_path[i] != '\'' && json_path[i] != '"') { + ++i; + } + + key = json_path.substr(key_start, i - key_start); + + i += 2; + } else if ((i+2 < json_path.size()) && json_path[i] == '[' && json_path[i+1] == '*' && json_path[i+2] == ']') { // i.e [*].additional_keys or [*]["additional_keys"] + key = "*"; + i += 3; + } + + + return std::make_pair(key, json_path.substr(i)); +} + } // namespace simdjson #endif // SIMDJSON_JSONPATHUTIL_H /* end file simdjson/jsonpathutil.h */ @@ -7069,11 +7542,22 @@ inline simdjson_result simdjson_result::at_pointer(std return at_pointer(json_pointer); } +inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) const noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} + inline simdjson_result simdjson_result::at(size_t index) const noexcept { if (error()) { return error(); } return first.at(index); } +inline std::vector& simdjson_result::get_values(std::vector& out) const noexcept { + return first.get_values(out); +} + namespace dom { // @@ -7144,6 +7628,93 @@ inline simdjson_result array::at_path(std::string_view json_path) const return at_pointer(json_pointer); } +inline void array::process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept { + if (current == end) { + return; + } + + simdjson_result> result; + + + for (auto it = current; it != end; ++it) { + std::vector child_result; + auto error = it->at_path_with_wildcard(path_suffix).get(child_result); + if(error) { + continue; + } + accumulator.reserve(accumulator.size() + child_result.size()); + accumulator.insert(accumulator.end(), + std::make_move_iterator(child_result.begin()), + std::make_move_iterator(child_result.end())); + } +} + +inline simdjson_result> array::at_path_with_wildcard(std::string_view json_path) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + + size_t i = 0; + // json_path.starts_with('$') requires C++20. + if (!json_path.empty() && json_path.front() == '$') { + i = 1; + } + + if (i >= json_path.size() || (json_path[i] != '.' && json_path[i] != '[')) { + return INVALID_JSON_POINTER; + } + + if (json_path.find("*") != std::string::npos) { + std::vector child_values; + + if ( + (json_path.compare(i, 3, "[*]") == 0 && json_path.size() == i + 3) || + (json_path.compare(i, 2,".*") == 0 && json_path.size() == i + 2) + ) { + get_values(child_values); + return child_values; + } + + std::pair key_and_json_path = get_next_key_and_json_path(json_path); + + std::string_view key = key_and_json_path.first; + json_path = key_and_json_path.second; + + if (key.size() > 0) { + if (key == "*") { + get_values(child_values); + } else { + element pointer_result; + std::string json_pointer = std::string("/") + std::string(key); + auto error = at_pointer(json_pointer).get(pointer_result); + + if (!error) { + child_values.emplace_back(pointer_result); + } + } + + std::vector result = {}; + + if (child_values.size() > 0) { + std::vector::iterator child_values_begin = child_values.begin(); + std::vector::iterator child_values_end = child_values.end(); + + process_json_path_of_child_elements(child_values_begin, child_values_end, json_path, result); + } + + return result; + } else { + return INVALID_JSON_POINTER; + } + } else { + element result; + auto error = at_path(json_path).get(result); + if (error) { + return error; + } + + return std::vector{std::move(result)}; + } +} + inline simdjson_result array::at(size_t index) const noexcept { SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 size_t i=0; @@ -7154,6 +7725,15 @@ inline simdjson_result array::at(size_t index) const noexcept { return INDEX_OUT_OF_BOUNDS; } +inline std::vector& array::get_values(std::vector& out) const noexcept { + out.reserve(this->size()); + for (auto element : *this) { + out.emplace_back(element); + } + + return out; +} + inline array::operator element() const noexcept { return element(tape); } @@ -7253,10 +7833,19 @@ inline simdjson_result simdjson_result::at_path(std:: if (json_pointer == "-1") { return INVALID_JSON_POINTER; } return at_pointer(json_pointer); } +inline simdjson_result> simdjson_result::at_path_with_wildcard(std::string_view json_path) const noexcept { + if (error()) { + return error(); + } + return first.at_path_with_wildcard(json_path); +} inline simdjson_result simdjson_result::at_key(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key(key); } +inline std::vector& simdjson_result::get_values(std::vector& out) const noexcept { + return first.get_values(out); +} inline simdjson_result simdjson_result::at_key_case_insensitive(std::string_view key) const noexcept { if (error()) { return error(); } return first.at_key_case_insensitive(key); @@ -7356,6 +7945,97 @@ inline simdjson_result object::at_path(std::string_view json_path) cons return at_pointer(json_pointer); } +inline void object::process_json_path_of_child_elements(std::vector::iterator& current, std::vector::iterator& end, const std::string_view& path_suffix, std::vector& accumulator) const noexcept { + if (current == end) { + return; + } + + simdjson_result> result; + + for (auto it = current; it != end; ++it) { + std::vector child_result; + auto error = it->at_path_with_wildcard(path_suffix).get(child_result); + if(error) { + continue; + } + accumulator.reserve(accumulator.size() + child_result.size()); + accumulator.insert(accumulator.end(), + std::make_move_iterator(child_result.begin()), + std::make_move_iterator(child_result.end())); + } +} + +inline simdjson_result> object::at_path_with_wildcard(std::string_view json_path) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + + size_t i = 0; + if (json_path.empty()) { + return INVALID_JSON_POINTER; + } + // if JSONPath starts with $, skip it + // json_path.starts_with('$') requires C++20. + if (json_path.front() == '$') { + i = 1; + } + + if (i >= json_path.size() || (json_path[i] != '.' && json_path[i] != '[')) { + // expect json path to always start with $ but this isn't currently + // expected in jsonpathutil.h. + return INVALID_JSON_POINTER; + } + + if (json_path.find("*") != std::string::npos) { + + std::vector child_values; + + if ( + (json_path.compare(i, 3, "[*]") == 0 && json_path.size() == i + 3) || + (json_path.compare(i, 2,".*") == 0 && json_path.size() == i + 2) + ) { + get_values(child_values); + return child_values; + } + + std::pair key_and_json_path = get_next_key_and_json_path(json_path); + + std::string_view key = key_and_json_path.first; + json_path = key_and_json_path.second; + + if (key.size() > 0) { + if (key == "*") { + get_values(child_values); + } else { + element pointer_result; + auto error = at_pointer(std::string("/") + std::string(key)).get(pointer_result); + + if (!error) { + child_values.emplace_back(pointer_result); + } + } + + std::vector result = {}; + if (child_values.size() > 0) { + + std::vector::iterator child_values_begin = child_values.begin(); + std::vector::iterator child_values_end = child_values.end(); + + process_json_path_of_child_elements(child_values_begin, child_values_end, json_path, result); + } + + return result; + } else { + return INVALID_JSON_POINTER; + } + } else { + element result; + auto error = this->at_path(json_path).get(result); + if (error) { + return error; + } + return std::vector{std::move(result)}; + } +} + inline simdjson_result object::at_key(std::string_view key) const noexcept { iterator end_field = end(); for (iterator field = begin(); field != end_field; ++field) { @@ -7365,6 +8045,18 @@ inline simdjson_result object::at_key(std::string_view key) const noexc } return NO_SUCH_FIELD; } + +inline std::vector& object::get_values(std::vector& out) const noexcept { + iterator end_field = end(); + iterator begin_field = begin(); + + out.reserve(std::distance(begin_field, end_field)); + for (iterator field = begin_field; field != end_field; ++field) { + out.emplace_back(field.value()); + } + + return out; +} // In case you wonder why we need this, please see // https://github.com/simdjson/simdjson/issues/323 // People do seek keys in a case-insensitive manner. @@ -7476,14 +8168,14 @@ inline key_value_pair::key_value_pair(std::string_view _key, element _value) noe } // namespace simdjson -#if defined(__cpp_lib_ranges) +#if SIMDJSON_SUPPORTS_RANGES static_assert(std::ranges::view); static_assert(std::ranges::sized_range); #if SIMDJSON_EXCEPTIONS static_assert(std::ranges::view>); static_assert(std::ranges::sized_range>); #endif // SIMDJSON_EXCEPTIONS -#endif // defined(__cpp_lib_ranges) +#endif // SIMDJSON_SUPPORTS_RANGES #endif // SIMDJSON_OBJECT_INL_H /* end file simdjson/dom/object-inl.h */ @@ -7607,6 +8299,12 @@ simdjson_inline simdjson_result simdjson_result::at_ if (json_pointer == "-1") { return INVALID_JSON_POINTER; } return at_pointer(json_pointer); } + +simdjson_inline simdjson_result> simdjson_result::at_path_with_wildcard(const std::string_view json_path) const noexcept { + if (error()) { return error(); } + return first.at_path_with_wildcard(json_path); +} + #ifndef SIMDJSON_DISABLE_DEPRECATED_API [[deprecated("For standard compliance, use at_pointer instead, and prefix your pointers with a slash '/', see RFC6901 ")]] simdjson_inline simdjson_result simdjson_result::at(const std::string_view json_pointer) const noexcept { @@ -7897,6 +8595,20 @@ inline simdjson_result element::at_pointer(std::string_view json_pointe } } } + +inline simdjson_result> element::at_path_with_wildcard(std::string_view json_path) const noexcept { + SIMDJSON_DEVELOPMENT_ASSERT(tape.usable()); // https://github.com/simdjson/simdjson/issues/1914 + + switch (tape.tape_ref_type()) { + case internal::tape_type::START_OBJECT: + return object(tape).at_path_with_wildcard(json_path); + case internal::tape_type::START_ARRAY: + return array(tape).at_path_with_wildcard(json_path); + default: + return std::vector{}; + } +} + inline simdjson_result element::at_path(std::string_view json_path) const noexcept { auto json_pointer = json_path_to_pointer_conversion(json_path); if (json_pointer == "-1") { return INVALID_JSON_POINTER; } @@ -7963,14 +8675,14 @@ inline std::ostream& operator<<(std::ostream& out, element_type type) { #endif // SIMDJSON_ELEMENT_INL_H /* end file simdjson/dom/element-inl.h */ -#if defined(__cpp_lib_ranges) +#if SIMDJSON_SUPPORTS_RANGES static_assert(std::ranges::view); static_assert(std::ranges::sized_range); #if SIMDJSON_EXCEPTIONS static_assert(std::ranges::view>); static_assert(std::ranges::sized_range>); #endif // SIMDJSON_EXCEPTIONS -#endif // defined(__cpp_lib_ranges) +#endif // SIMDJSON_SUPPORTS_RANGES #endif // SIMDJSON_ARRAY_INL_H /* end file simdjson/dom/array-inl.h */ @@ -8021,11 +8733,11 @@ inline bool parser::dump_raw_tape(std::ostream &os) const noexcept { return valid ? doc.dump_raw_tape(os) : false; } -inline simdjson_result parser::read_file(const std::string &path) noexcept { +inline simdjson_result parser::read_file(std::string_view path) noexcept { // Open the file SIMDJSON_PUSH_DISABLE_WARNINGS SIMDJSON_DISABLE_DEPRECATED_WARNING // Disable CRT_SECURE warning on MSVC: manually verified this is safe - std::FILE *fp = std::fopen(path.c_str(), "rb"); + std::FILE *fp = std::fopen(path.data(), "rb"); SIMDJSON_POP_DISABLE_WARNINGS if (fp == nullptr) { @@ -8077,18 +8789,18 @@ inline simdjson_result parser::read_file(const std::string &path) noexce return bytes_read; } -inline simdjson_result parser::load(const std::string &path) & noexcept { +inline simdjson_result parser::load(std::string_view path) & noexcept { return load_into_document(doc, path); } -inline simdjson_result parser::load_into_document(document& provided_doc, const std::string &path) & noexcept { +inline simdjson_result parser::load_into_document(document& provided_doc, std::string_view path) & noexcept { size_t len; auto _error = read_file(path).get(len); if (_error) { return _error; } return parse_into_document(provided_doc, loaded_bytes.get(), len, false); } -inline simdjson_result parser::load_many(const std::string &path, size_t batch_size) noexcept { +inline simdjson_result parser::load_many(std::string_view path, size_t batch_size) noexcept { size_t len; auto _error = read_file(path).get(len); if (_error) { return _error; } @@ -8824,8 +9536,8 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept { #define SIMDJSON_SERIALIZATION_INL_H /* skipped duplicate #include "simdjson/dom/base.h" */ -/* skipped duplicate #include "simdjson/dom/serialization.h" */ /* skipped duplicate #include "simdjson/dom/parser.h" */ +/* skipped duplicate #include "simdjson/dom/serialization.h" */ /* skipped duplicate #include "simdjson/internal/tape_type.h" */ /* skipped duplicate #include "simdjson/dom/array-inl.h" */ @@ -8837,7 +9549,9 @@ inline bool document::dump_raw_tape(std::ostream &os) const noexcept { namespace simdjson { namespace dom { inline bool parser::print_json(std::ostream &os) const noexcept { - if (!valid) { return false; } + if (!valid) { + return false; + } simdjson::internal::string_builder<> sb; sb.append(doc.root()); std::string_view answer = sb.str(); @@ -8845,37 +9559,51 @@ inline bool parser::print_json(std::ostream &os) const noexcept { return true; } -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::element value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); +inline std::ostream &operator<<(std::ostream &out, + simdjson::dom::element value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x) { + if (x.error()) { + throw simdjson::simdjson_error(x.error()); + } + return (out << x.value()); } #endif -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::array value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); +inline std::ostream &operator<<(std::ostream &out, simdjson::dom::array value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x) { + if (x.error()) { + throw simdjson::simdjson_error(x.error()); + } + return (out << x.value()); } #endif -inline std::ostream& operator<<(std::ostream& out, simdjson::dom::object value) { - simdjson::internal::string_builder<> sb; - sb.append(value); - return (out << sb.str()); +inline std::ostream &operator<<(std::ostream &out, + simdjson::dom::object value) { + simdjson::internal::string_builder<> sb; + sb.append(value); + return (out << sb.str()); } #if SIMDJSON_EXCEPTIONS -inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result x) { - if (x.error()) { throw simdjson::simdjson_error(x.error()); } - return (out << x.value()); +inline std::ostream & +operator<<(std::ostream &out, + simdjson::simdjson_result x) { + if (x.error()) { + throw simdjson::simdjson_error(x.error()); + } + return (out << x.value()); } #endif @@ -8890,8 +9618,9 @@ namespace { * We expect that most compilers will use 8 bytes for this data structure. **/ struct escape_sequence { - uint8_t length; - const char string[7]; // technically, we only ever need 6 characters, we pad to 8 + uint8_t length; + const char + string[7]; // technically, we only ever need 6 characters, we pad to 8 }; /**@private * This converts a signed integer into a character sequence. @@ -8907,7 +9636,7 @@ static char *fast_itoa(char *output, int64_t value) noexcept { char buffer[20]; uint64_t value_positive; // In general, negating a signed integer is unsafe. - if(value < 0) { + if (value < 0) { *output++ = '-'; // Doing value_positive = -value; while avoiding // undefined behavior warnings. @@ -8926,7 +9655,7 @@ static char *fast_itoa(char *output, int64_t value) noexcept { // A faster approach is possible if we expect large integers: // unroll the loop (work in 100s, 1000s) and use some kind of // memoization. - while(value_positive >= 10) { + while (value_positive >= 10) { *write_pointer-- = char('0' + (value_positive % 10)); value_positive /= 10; } @@ -8952,7 +9681,7 @@ static char *fast_itoa(char *output, uint64_t value) noexcept { // A faster approach is possible if we expect large integers: // unroll the loop (work in 100s, 1000s) and use some kind of // memoization. - while(value >= 10) { + while (value >= 10) { *write_pointer-- = char('0' + (value % 10)); value /= 10; }; @@ -8962,7 +9691,6 @@ static char *fast_itoa(char *output, uint64_t value) noexcept { return output + len; } - } // anonymous namespace namespace internal { @@ -8970,194 +9698,209 @@ namespace internal { * Minifier/formatter code. **/ -template +template simdjson_inline void base_formatter::number(uint64_t x) { char number_buffer[24]; char *newp = fast_itoa(number_buffer, x); - buffer.insert(buffer.end(), number_buffer, newp); + chars(number_buffer, newp); } -template +template simdjson_inline void base_formatter::number(int64_t x) { char number_buffer[24]; char *newp = fast_itoa(number_buffer, x); - buffer.insert(buffer.end(), number_buffer, newp); + chars(number_buffer, newp); } -template +template simdjson_inline void base_formatter::number(double x) { char number_buffer[24]; // Currently, passing the nullptr to the second argument is // safe because our implementation does not check the second // argument. char *newp = internal::to_chars(number_buffer, nullptr, x); - buffer.insert(buffer.end(), number_buffer, newp); + chars(number_buffer, newp); } -template -simdjson_inline void base_formatter::start_array() { one_char('['); } - +template +simdjson_inline void base_formatter::start_array() { + one_char('['); +} -template -simdjson_inline void base_formatter::end_array() { one_char(']'); } +template +simdjson_inline void base_formatter::end_array() { + one_char(']'); +} -template -simdjson_inline void base_formatter::start_object() { one_char('{'); } +template +simdjson_inline void base_formatter::start_object() { + one_char('{'); +} -template -simdjson_inline void base_formatter::end_object() { one_char('}'); } +template +simdjson_inline void base_formatter::end_object() { + one_char('}'); +} -template -simdjson_inline void base_formatter::comma() { one_char(','); } +template +simdjson_inline void base_formatter::comma() { + one_char(','); +} -template +template simdjson_inline void base_formatter::true_atom() { - const char * s = "true"; - buffer.insert(buffer.end(), s, s + 4); + const char *s = "true"; + chars(s, s + 4); } -template +template simdjson_inline void base_formatter::false_atom() { - const char * s = "false"; - buffer.insert(buffer.end(), s, s + 5); + const char *s = "false"; + chars(s, s + 5); } -template +template simdjson_inline void base_formatter::null_atom() { - const char * s = "null"; - buffer.insert(buffer.end(), s, s + 4); + const char *s = "null"; + chars(s, s + 4); +} + +template +simdjson_inline void base_formatter::one_char(char c) { + buffer.push_back(c); } -template -simdjson_inline void base_formatter::one_char(char c) { buffer.push_back(c); } +template +simdjson_inline void base_formatter::chars(const char *begin, + const char *end) { + buffer.append(begin, end); +} -template -simdjson_inline void base_formatter::key(std::string_view unescaped) { +template +simdjson_inline void +base_formatter::key(std::string_view unescaped) { string(unescaped); one_char(':'); } -template -simdjson_inline void base_formatter::string(std::string_view unescaped) { +template +simdjson_inline void +base_formatter::string(std::string_view unescaped) { one_char('\"'); size_t i = 0; - // Fast path for the case where we have no control character, no ", and no backslash. - // This should include most keys. - // - // We would like to use 'bool' but some compilers take offense to bitwise operation - // with bool types. - constexpr static char needs_escaping[] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - for(;i + 8 <= unescaped.length(); i += 8) { + // Fast path for the case where we have no control character, no ", and no + // backslash. This should include most keys. + // + // We would like to use 'bool' but some compilers take offense to bitwise + // operation with bool types. + constexpr static char needs_escaping[] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + for (; i + 8 <= unescaped.length(); i += 8) { // Poor's man vectorization. This could get much faster if we used SIMD. // - // It is not the case that replacing '|' with '||' would be neutral performance-wise. - if(needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i+1])] - | needs_escaping[uint8_t(unescaped[i+2])] | needs_escaping[uint8_t(unescaped[i+3])] - | needs_escaping[uint8_t(unescaped[i+4])] | needs_escaping[uint8_t(unescaped[i+5])] - | needs_escaping[uint8_t(unescaped[i+6])] | needs_escaping[uint8_t(unescaped[i+7])] - ) { break; } - } - for(;i < unescaped.length(); i++) { - if(needs_escaping[uint8_t(unescaped[i])]) { break; } - } - // The following is also possible and omits a 256-byte table, but it is slower: - // for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) + // It is not the case that replacing '|' with '||' would be neutral + // performance-wise. + if (needs_escaping[uint8_t(unescaped[i])] | + needs_escaping[uint8_t(unescaped[i + 1])] | + needs_escaping[uint8_t(unescaped[i + 2])] | + needs_escaping[uint8_t(unescaped[i + 3])] | + needs_escaping[uint8_t(unescaped[i + 4])] | + needs_escaping[uint8_t(unescaped[i + 5])] | + needs_escaping[uint8_t(unescaped[i + 6])] | + needs_escaping[uint8_t(unescaped[i + 7])]) { + break; + } + } + for (; i < unescaped.length(); i++) { + if (needs_escaping[uint8_t(unescaped[i])]) { + break; + } + } + // The following is also possible and omits a 256-byte table, but it is + // slower: for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F) // && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {} // At least for long strings, the following should be fast. We could // do better by integrating the checks and the insertion. - buffer.insert(buffer.end(), unescaped.data(), unescaped.data() + i); + chars(unescaped.data(), unescaped.data() + i); // We caught a control character if we enter this loop (slow). // Note that we are do not restart from the beginning, but rather we continue // from the point where we encountered something that requires escaping. for (; i < unescaped.length(); i++) { switch (unescaped[i]) { - case '\"': - { - const char * s = "\\\""; - buffer.insert(buffer.end(), s, s + 2); - } - break; - case '\\': - { - const char * s = "\\\\"; - buffer.insert(buffer.end(), s, s + 2); - } - break; + case '\"': { + const char *s = "\\\""; + chars(s, s + 2); + } break; + case '\\': { + const char *s = "\\\\"; + chars(s, s + 2); + } break; default: if (uint8_t(unescaped[i]) <= 0x1F) { // If packed, this uses 8 * 32 bytes. // Note that we expect most compilers to embed this code in the data // section. constexpr static escape_sequence escaped[32] = { - {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, - {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, - {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, - {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, - {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, - {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, - {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, - {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; + {6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, + {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"}, {6, "\\u0007"}, + {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, + {2, "\\f"}, {2, "\\r"}, {6, "\\u000e"}, {6, "\\u000f"}, + {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, + {6, "\\u0014"}, {6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, + {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"}, + {6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}}; auto u = escaped[uint8_t(unescaped[i])]; - buffer.insert(buffer.end(), u.string, u.string + u.length); + chars(u.string, u.string + u.length); } else { one_char(unescaped[i]); } } // switch - } // for + } // for one_char('\"'); } - -template -inline void base_formatter::clear() { +template inline void base_formatter::clear() { buffer.clear(); } -template +template simdjson_inline std::string_view base_formatter::str() const { - return std::string_view(buffer.data(), buffer.size()); + return buffer.str(); } -simdjson_inline void mini_formatter::print_newline() { - return; -} +simdjson_inline void mini_formatter::print_newline() { return; } simdjson_inline void mini_formatter::print_indents(size_t depth) { - (void)depth; - return; + (void)depth; + return; } -simdjson_inline void mini_formatter::print_space() { - return; -} +simdjson_inline void mini_formatter::print_space() { return; } -simdjson_inline void pretty_formatter::print_newline() { - one_char('\n'); -} +simdjson_inline void pretty_formatter::print_newline() { one_char('\n'); } simdjson_inline void pretty_formatter::print_indents(size_t depth) { - if(this->indent_step <= 0) { - return; - } - for(size_t i = 0; i < this->indent_step * depth; i++) { - one_char(' '); - } -} - -simdjson_inline void pretty_formatter::print_space() { + if (this->indent_step <= 0) { + return; + } + for (size_t i = 0; i < this->indent_step * depth; i++) { one_char(' '); + } } +simdjson_inline void pretty_formatter::print_space() { one_char(' '); } + /*** * String building code. **/ @@ -9335,7 +10078,8 @@ inline void string_builder::append(simdjson::dom::array value) { } template -simdjson_inline void string_builder::append(simdjson::dom::key_value_pair kv) { +simdjson_inline void +string_builder::append(simdjson::dom::key_value_pair kv) { format.key(kv.key); append(kv.value); } @@ -9350,7 +10094,6 @@ simdjson_inline std::string_view string_builder::str() const { return format.str(); } - } // namespace internal } // namespace simdjson @@ -10561,6 +11304,12 @@ namespace { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } }; @@ -10637,7 +11386,7 @@ namespace { // Bit-specific operations simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } @@ -10650,7 +11399,12 @@ namespace { return lookup_table.apply_lookup_16_to(*this); } - + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes @@ -10973,6 +11727,32 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits) / 4; } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask64() + }; +} + + + } // unnamed namespace } // namespace arm64 } // namespace simdjson @@ -11399,6 +12179,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -11406,6 +12191,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -11447,6 +12242,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -11599,7 +12395,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -12109,7 +12905,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -12570,6 +13375,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -12808,8 +13619,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -12994,6 +13834,24 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin return { src[0] }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits; } + simdjson_inline int escape_index() { return 0; } + + bool escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + dst[0] = src[0]; + return { (src[0] == '\\') || (src[0] == '"') || (src[0] < 32) }; +} + } // unnamed namespace } // namespace fallback } // namespace simdjson @@ -13507,6 +14365,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -13514,6 +14377,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -13555,6 +14428,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -13707,7 +14581,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -14217,7 +15091,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -14678,6 +15561,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -14916,8 +15805,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -15683,6 +16601,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace haswell } // namespace simdjson @@ -16107,6 +17050,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -16114,6 +17062,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -16155,6 +17113,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -16307,7 +17266,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -16817,7 +17776,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -17278,6 +18246,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -17516,8 +18490,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -17877,7 +18880,6 @@ namespace simd { friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); } - static const int SIZE = sizeof(base::value); template @@ -18220,6 +19222,35 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(uint64_t(escape_bits)); } + + __mmask64 escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + __mmask64 is_quote = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('"')); + __mmask64 is_backslash = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('\\')); + __mmask64 is_control = _mm512_cmplt_epi8_mask(v, _mm512_set1_epi8(32)); + return { + (is_backslash | is_quote | is_control) + }; +} + + + + } // unnamed namespace } // namespace icelake } // namespace simdjson @@ -18704,6 +19735,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -18711,6 +19747,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -18752,6 +19798,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -18904,7 +19951,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -19414,7 +20461,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -19875,6 +20931,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -20113,8 +21175,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -20992,6 +22083,32 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + // We store it as a 64-bit bitmask even though we only need 16 bits. + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace ppc64 } // namespace simdjson @@ -21418,6 +22535,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -21425,6 +22547,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -21466,6 +22598,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -21618,7 +22751,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -22128,7 +23261,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -22589,6 +23731,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -22827,8 +23975,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -24025,6 +25202,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace westmere } // namespace simdjson @@ -24449,6 +25651,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -24456,6 +25663,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -24497,6 +25714,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -24649,7 +25867,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -25159,7 +26377,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -25620,6 +26847,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -25858,8 +27091,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -26531,6 +27793,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lsx } // namespace simdjson @@ -26957,6 +28244,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -26964,6 +28256,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -27005,6 +28307,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -27157,7 +28460,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -27667,7 +28970,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -28128,6 +29440,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -28366,8 +29684,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -29052,6 +30399,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lasx } // namespace simdjson @@ -29478,6 +30850,11 @@ struct implementation_simdjson_result_base { */ simdjson_inline error_code error() const noexcept; + /** + * Whether there is a value. + */ + simdjson_inline bool has_value() const noexcept; + #if SIMDJSON_EXCEPTIONS /** @@ -29485,6 +30862,16 @@ struct implementation_simdjson_result_base { * * @throw simdjson_error if there was an error. */ + simdjson_inline T& operator*() & noexcept(false); + simdjson_inline T&& operator*() && noexcept(false); + /** + * Arrow operator to access members of the contained value. + * + * @throw simdjson_error if there was an error. + */ + simdjson_inline T* operator->() noexcept(false); + simdjson_inline const T* operator->() const noexcept(false); + simdjson_inline T& value() & noexcept(false); /** @@ -29526,6 +30913,7 @@ struct implementation_simdjson_result_base { * the error() method returns a value that evaluates to false. */ simdjson_inline T&& value_unsafe() && noexcept; + protected: /** users should never directly access first and second. **/ T first{}; /** Users should never directly access 'first'. **/ @@ -29678,7 +31066,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, // floor(log(5**power)/log(2)) // // Note that this is not magic: 152170/(1<<16) is - // approximatively equal to log(5)/log(2). + // approximately equal to log(5)/log(2). // The 1<<16 value is a power of two; we could use a // larger power of 2 if we wanted to. // @@ -30188,7 +31576,16 @@ simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) { if (i > uint64_t(INT64_MAX)) { WRITE_UNSIGNED(i, src, writer); } else { - WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(i == 0 && negative) { + // We have to write -0.0 instead of 0 + WRITE_DOUBLE(-0.0, src, writer); + } else { + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); + } +#else + WRITE_INTEGER(negative ? (~i+1) : i, src, writer); +#endif } if (jsoncharutils::is_not_structural_or_whitespace(*p)) { return INVALID_NUMBER(src); } return SUCCESS; @@ -30649,6 +32046,12 @@ simdjson_unused simdjson_inline simdjson_result get_number_type(con if (simdjson_unlikely(digit_count == 19 && memcmp(src, smaller_big_integer, 19) > 0)) { return number_type::big_integer; } +#if SIMDJSON_MINUS_ZERO_AS_FLOAT + if(digit_count == 1 && src[0] == '0') { + // We have to write -0.0 instead of 0 + return number_type::floating_point_number; + } +#endif return number_type::signed_integer; } // Let us check if we have a big integer (>=2**64). @@ -30887,8 +32290,37 @@ simdjson_inline error_code implementation_simdjson_result_base::error() const return this->second; } + +template +simdjson_inline bool implementation_simdjson_result_base::has_value() const noexcept { + return this->error() == SUCCESS; +} + #if SIMDJSON_EXCEPTIONS +template +simdjson_inline T& implementation_simdjson_result_base::operator*() & noexcept(false) { + return this->value(); +} + +template +simdjson_inline T&& implementation_simdjson_result_base::operator*() && noexcept(false) { + return std::forward>(*this).value(); +} + +template +simdjson_inline T* implementation_simdjson_result_base::operator->() noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + + +template +simdjson_inline const T* implementation_simdjson_result_base::operator->() const noexcept(false) { + if (this->error()) { throw simdjson_error(this->error()); } + return &this->first; +} + template simdjson_inline T& implementation_simdjson_result_base::value() & noexcept(false) { if (error()) { throw simdjson_error(error()); } @@ -30980,6 +32412,7 @@ simdjson_inline implementation_simdjson_result_base::implementation_simdjson_ // Internal headers needed for ondemand generics. // All includes not under simdjson/generic/ondemand must be here! // Otherwise, amalgamation will fail. +/* skipped duplicate #include "simdjson/concepts.h" */ /* skipped duplicate #include "simdjson/dom/base.h" // for MINIMAL_DOCUMENT_CAPACITY */ /* skipped duplicate #include "simdjson/implementation.h" */ /* skipped duplicate #include "simdjson/padded_string.h" */ @@ -31414,6 +32847,12 @@ namespace { tmp = vpaddq_u8(tmp, tmp); return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); } + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } simdjson_inline bool any() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } }; @@ -31490,7 +32929,7 @@ namespace { // Bit-specific operations simdjson_inline simd8 any_bits_set(simd8 bits) const { return vtstq_u8(*this, bits); } - simdjson_inline bool any_bits_set_anywhere() const { return this->max_val() != 0; } + simdjson_inline bool any_bits_set_anywhere() const { return vmaxvq_u32(vreinterpretq_u32_u8(*this)) != 0; } simdjson_inline bool any_bits_set_anywhere(simd8 bits) const { return (*this & bits).any_bits_set_anywhere(); } template simdjson_inline simd8 shr() const { return vshrq_n_u8(*this, N); } @@ -31503,7 +32942,12 @@ namespace { return lookup_table.apply_lookup_16_to(*this); } - + // Returns 4-bit out of each byte, alternating between the high 4 bits and low + // bits result it is 64 bit. + simdjson_inline uint64_t to_bitmask64() const { + return vget_lane_u64( + vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(*this), 4)), 0); + } // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset). // Passing a 0 value for mask would be equivalent to writing out every byte to output. // Only the first 16 - count_ones(mask) bytes of the result are significant but 16 bytes @@ -31826,6 +33270,32 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits) / 4; } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask64() + }; +} + + + } // unnamed namespace } // namespace arm64 } // namespace simdjson @@ -31894,7 +33364,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for arm64 */ /* including simdjson/generic/ondemand/deserialize.h for arm64: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for arm64 */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -31973,7 +33443,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -31984,28 +33454,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = arm64::ondemand::array; + using object_type = arm64::ondemand::object; using value_type = arm64::ondemand::value; using document_type = arm64::ondemand::document; using document_reference_type = arm64::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -32015,7 +33501,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for arm64 */ /* including simdjson/generic/ondemand/value_iterator.h for arm64: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -32147,7 +33633,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -32157,7 +33644,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -32279,7 +33767,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -32289,7 +33778,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -32551,13 +34041,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -32574,6 +34065,7 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. @@ -32581,28 +34073,47 @@ class value { */ template simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_CONCEPTS + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + if (is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -33313,7 +34824,22 @@ struct simdjson_result : public arm64::implementation_si simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -33925,6 +35451,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -34131,6 +35658,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -34270,10 +35803,10 @@ struct simdjson_result : public arm64::impleme simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(arm64::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -34289,6 +35822,7 @@ struct simdjson_result : public arm64::impleme /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace arm64 { @@ -34407,7 +35941,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -34495,6 +36031,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -34502,10 +36043,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -34531,14 +36072,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -34640,13 +36183,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -34810,6 +36379,37 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. @@ -34888,7 +36488,28 @@ struct simdjson_result : public arm64::implementation_si simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -34933,7 +36554,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -34957,6 +36579,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -34975,7 +36602,6 @@ namespace simdjson { template<> struct simdjson_result : public arm64::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(arm64::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -34988,6 +36614,8 @@ struct simdjson_result : public arm64::implemen simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -35181,7 +36809,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -35204,7 +36832,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -35222,35 +36850,42 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -35313,7 +36948,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -35322,7 +36957,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -35472,11 +37107,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -35766,7 +37417,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -35779,7 +37430,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -35802,30 +37453,37 @@ class document_reference { */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -35910,6 +37568,9 @@ struct simdjson_result : public arm64::implementation template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using arm64::implementation_simdjson_result_base::operator*; + using arm64::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator arm64::ondemand::array() & noexcept(false); @@ -36168,6 +37829,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -36181,6 +37843,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -36224,6 +37887,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -36235,6 +37903,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -36695,6 +38364,36 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. @@ -36738,12 +38437,32 @@ struct simdjson_result : public arm64::implementation_s simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -36872,6 +38591,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -36943,17 +38676,22 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { template @@ -37000,6 +38738,32 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -37017,9 +38781,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - arm64::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, arm64::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -37045,6 +38813,77 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + arm64::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, arm64::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + arm64::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, arm64::ondemand::value &val, T &out) noexcept { + arm64::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, arm64::ondemand::document &doc, T &out) noexcept { + arm64::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, arm64::ondemand::document_reference &doc, T &out) noexcept { + arm64::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + /** * This CPO (Customization Point Object) will help deserialize into @@ -37086,17 +38925,16 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template +template requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -37105,10 +38943,295 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers && !require_custom_serialization); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + arm64::ondemand::object obj; + if constexpr (std::is_same_v, arm64::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + error_code e = simdjson::SUCCESS; + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + // Note: removed static assert as optional types are now handled generically + // as long we are succesful or the field is not found, we continue + if(e == simdjson::SUCCESS || e == simdjson::NO_SUCH_FIELD) { + e = obj[key].get(out.[:mem:]); + } + } + }; + return e; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for arm64 */ // Inline definitions @@ -37392,6 +39515,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace arm64 } // namespace simdjson @@ -37428,7 +39554,9 @@ simdjson_inline simdjson_result &simdjson_resul ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -38186,8 +40314,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -39141,10 +41269,19 @@ simdjson_inline document_stream::iterator& document_stream::iterator::operator++ return *this; } +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; +} + + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; +} + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. @@ -39744,6 +41881,8 @@ simdjson_inline void json_iterator::assert_valid_position(token_position positio #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning #endif } @@ -40592,6 +42731,7 @@ simdjson_inline simdjson_result simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -40806,7 +42946,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -40838,10 +42978,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(s } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); + return iterate(pad_with_reserve(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { @@ -40863,7 +43000,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(c } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -40878,6 +43015,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iter } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; @@ -40886,16 +43024,24 @@ inline simdjson_result parser::iterate_many(const uint8_t *buf, if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } + inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); } - simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } @@ -40930,6 +43076,34 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u return result; } +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); +} + +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace arm64 } // namespace simdjson @@ -40964,8 +43138,13 @@ namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); +} +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; +} simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; @@ -41142,6 +43321,10 @@ simdjson_inline simdjson_result simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(arm64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); @@ -41167,6 +43350,9 @@ simdjson_inline simdjson_warn_unused simdjson_result simdjson_ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -42269,19 +44455,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -42544,7 +44734,7 @@ simdjson_inline simdjson_result value_iterator::type() const noexcept case '5': case '6': case '7': case '8': case '9': return json_type::number; default: - return TAPE_ERROR; + return json_type::unknown; } } @@ -42584,6 +44774,1221 @@ simdjson_inline simdjson_result::simdjson_resul #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for arm64 */ +// JSON builder, ideally they should not be part of the ondemand directory +// but it is convenient for now to have them here. +/* including simdjson/generic/ondemand/json_string_builder.h for arm64: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for arm64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace arm64 { +namespace builder { + +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = 1024); + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // Support for optional types (std::optional, etc.) + template + simdjson_inline void append(const T &opt); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + simdjson::arm64::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +#endif + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for arm64 */ +/* including simdjson/generic/ondemand/json_string_builder-inl.h for arm64: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for arm64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + +namespace simdjson { +namespace arm64 { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +/** + +A possible SWAR implementation of has_json_escapable_byte. It is not used because +it is slower than the current implementation. It is kept here for reference (to show +that we tried it). + +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} + +**/ + +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if(json_quotable_character[static_cast(c)]) { + return true; + } + } + return false; +} + +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); +} +#endif + + +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); +} + +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; + +// All Unicode characters may be placed within the quotation marks, except for the +// characters that MUST be escaped: quotation mark, reverse solidus, and the control +// characters (U+0000 through U+001F). +// There are two-character sequence escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; + } + return out - initout; +} + +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} + +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { + return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} + +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); + } else { + is_valid = true; + } +} + +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; + } +} + +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } +} + +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} + +namespace internal { + +// We could specialize further for 32-bit integers. +simdjson_really_inline int int_log2(uint32_t x) { return (63 - leading_zeroes(x | 1)); } + +simdjson_really_inline int fast_digit_count(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int int_log2(uint64_t x) { return 63 - leading_zeroes(x | 1); } + +simdjson_really_inline int fast_digit_count(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + return fast_digit_count(v); +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal + +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + if (negative) { + buffer.get()[position++] = '-'; + } + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} + +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void string_builder::escape_and_append_with_quotes(const char* input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} + +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); + } +} + +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} +template +requires(std::is_convertible::value || +std::is_same::value ) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif + +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) +template +requires (!std::is_convertible::value) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range + } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); + + } +} + +#endif + +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(std::string_view()); +} + +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif + +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); +} + +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; +} + +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); +} + +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } +} + + +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; + } +} + + +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; + } +} + + +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } +} + + +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} + + +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; + } +} + +template +simdjson_inline void string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert( + std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_convertible::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else { + append(value); + } +} + +} // namespace builder +} // namespace arm64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for arm64 */ +/* including simdjson/generic/ondemand/json_builder.h for arm64: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for arm64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace arm64 { +namespace builder { + +// Concept that checks if a type is a container but not a string (because +// strings handling must be handled differently) +template +concept container_but_not_string = + requires(T a) { + { a.size() } -> std::convertible_to; + { + a[std::declval()] + }; // check if elements are accessible for the subscript operator + } && !std::is_same_v && + !std::is_same_v && !std::is_same_v; + +template + requires(container_but_not_string) +constexpr void atom(string_builder &b, const T &t) { + if (t.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, t[0]); + for (size_t i = 1; i < t.size(); ++i) { + b.append(','); + atom(b, t[i]); + } + b.append(']'); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); +} + +template +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); +} + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); +} + +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); +} + +// Support for optional types (std::optional, etc.) +template +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } +} + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } +} + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif +} + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); +} + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); +} + +// works for container +template + requires(container_but_not_string) +void append(string_builder &b, const Z &z) { + if (z.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, z[0]); + for (size_t i = 1; i < z.size(); ++i) { + b.append(','); + atom(b, z[i]); + } + b.append(']'); +} + +template +simdjson_result to_json_string(const Z &z, size_t initial_capacity = 1024) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} + +template +simdjson_error to_json(const Z &z, std::string &s) { + string_builder b; + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; +} + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} +} // namespace builder +} // namespace arm64 +// Alias the function template to 'to' in the global namespace +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + return arm64::builder::to_json_string(z, initial_capacity); +} +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for arm64 */ /* end file simdjson/generic/ondemand/amalgamated.h for arm64 */ /* including simdjson/arm64/end.h: #include "simdjson/arm64/end.h" */ @@ -42715,6 +46120,24 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin return { src[0] }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 1; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits; } + simdjson_inline int escape_index() { return 0; } + + bool escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + dst[0] = src[0]; + return { (src[0] == '\\') || (src[0] == '"') || (src[0] < 32) }; +} + } // unnamed namespace } // namespace fallback } // namespace simdjson @@ -42870,7 +46293,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for fallback */ /* including simdjson/generic/ondemand/deserialize.h for fallback: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for fallback */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -42949,7 +46372,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -42960,28 +46383,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = fallback::ondemand::array; + using object_type = fallback::ondemand::object; using value_type = fallback::ondemand::value; using document_type = fallback::ondemand::document; using document_reference_type = fallback::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -42991,7 +46430,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for fallback */ /* including simdjson/generic/ondemand/value_iterator.h for fallback: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -43123,7 +46562,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -43133,7 +46573,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -43255,7 +46696,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -43265,7 +46707,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -43527,13 +46970,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -43550,6 +46994,7 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. @@ -43557,28 +47002,47 @@ class value { */ template simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_CONCEPTS + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + if (is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -44289,7 +47753,22 @@ struct simdjson_result : public fallback::implementat simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -44901,6 +48380,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -45107,6 +48587,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -45246,10 +48732,10 @@ struct simdjson_result : public fallback::i simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(fallback::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -45265,6 +48751,7 @@ struct simdjson_result : public fallback::i /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace fallback { @@ -45383,7 +48870,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -45471,6 +48960,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -45478,10 +48972,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -45507,14 +49001,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -45616,13 +49112,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -45786,6 +49308,37 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. @@ -45864,7 +49417,28 @@ struct simdjson_result : public fallback::implementat simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -45909,7 +49483,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -45933,6 +49508,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -45951,7 +49531,6 @@ namespace simdjson { template<> struct simdjson_result : public fallback::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(fallback::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -45964,6 +49543,8 @@ struct simdjson_result : public fallback::im simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -46157,7 +49738,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -46180,7 +49761,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -46198,35 +49779,42 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -46289,7 +49877,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -46298,7 +49886,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -46448,11 +50036,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -46742,7 +50346,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -46755,7 +50359,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -46778,30 +50382,37 @@ class document_reference { */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -46886,6 +50497,9 @@ struct simdjson_result : public fallback::implemen template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using fallback::implementation_simdjson_result_base::operator*; + using fallback::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator fallback::ondemand::array() & noexcept(false); @@ -47144,6 +50758,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -47157,6 +50772,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -47200,6 +50816,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -47211,6 +50832,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -47671,6 +51293,36 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. @@ -47714,12 +51366,32 @@ struct simdjson_result : public fallback::implementa simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -47848,6 +51520,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -47919,17 +51605,22 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { template @@ -47976,6 +51667,32 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -47993,9 +51710,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - fallback::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, fallback::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -48021,6 +51742,77 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + fallback::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, fallback::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + fallback::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, fallback::ondemand::value &val, T &out) noexcept { + fallback::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, fallback::ondemand::document &doc, T &out) noexcept { + fallback::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, fallback::ondemand::document_reference &doc, T &out) noexcept { + fallback::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + /** * This CPO (Customization Point Object) will help deserialize into @@ -48062,17 +51854,16 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template +template requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -48081,10 +51872,295 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers && !require_custom_serialization); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + fallback::ondemand::object obj; + if constexpr (std::is_same_v, fallback::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + error_code e = simdjson::SUCCESS; + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + // Note: removed static assert as optional types are now handled generically + // as long we are succesful or the field is not found, we continue + if(e == simdjson::SUCCESS || e == simdjson::NO_SUCH_FIELD) { + e = obj[key].get(out.[:mem:]); + } + } + }; + return e; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for fallback */ // Inline definitions @@ -48368,6 +52444,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace fallback } // namespace simdjson @@ -48404,7 +52483,9 @@ simdjson_inline simdjson_result &simdjson_re ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -49162,8 +53243,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -50117,10 +54198,19 @@ simdjson_inline document_stream::iterator& document_stream::iterator::operator++ return *this; } +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; +} + + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; +} + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. @@ -50720,6 +54810,8 @@ simdjson_inline void json_iterator::assert_valid_position(token_position positio #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning #endif } @@ -51568,6 +55660,7 @@ simdjson_inline simdjson_result simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -51782,7 +55875,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -51814,10 +55907,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(s } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); + return iterate(pad_with_reserve(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { @@ -51839,7 +55929,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(c } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -51854,6 +55944,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iter } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; @@ -51862,16 +55953,24 @@ inline simdjson_result parser::iterate_many(const uint8_t *buf, if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } + inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); } - simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } @@ -51906,6 +56005,34 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u return result; } +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); +} + +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace fallback } // namespace simdjson @@ -51940,8 +56067,13 @@ namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); +} +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; +} simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; @@ -52118,6 +56250,10 @@ simdjson_inline simdjson_result simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(fallback::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); @@ -52143,6 +56279,9 @@ simdjson_inline simdjson_warn_unused simdjson_result simdjson_ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -53245,19 +57384,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -53520,7 +57663,7 @@ simdjson_inline simdjson_result value_iterator::type() const noexcept case '5': case '6': case '7': case '8': case '9': return json_type::number; default: - return TAPE_ERROR; + return json_type::unknown; } } @@ -53560,6 +57703,1221 @@ simdjson_inline simdjson_result::simdjson_re #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for fallback */ +// JSON builder, ideally they should not be part of the ondemand directory +// but it is convenient for now to have them here. +/* including simdjson/generic/ondemand/json_string_builder.h for fallback: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for fallback */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace fallback { +namespace builder { + +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = 1024); + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // Support for optional types (std::optional, etc.) + template + simdjson_inline void append(const T &opt); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + simdjson::fallback::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +#endif + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for fallback */ +/* including simdjson/generic/ondemand/json_string_builder-inl.h for fallback: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for fallback */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + +namespace simdjson { +namespace fallback { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +/** + +A possible SWAR implementation of has_json_escapable_byte. It is not used because +it is slower than the current implementation. It is kept here for reference (to show +that we tried it). + +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} + +**/ + +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if(json_quotable_character[static_cast(c)]) { + return true; + } + } + return false; +} + +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); +} +#endif + + +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); +} + +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; + +// All Unicode characters may be placed within the quotation marks, except for the +// characters that MUST be escaped: quotation mark, reverse solidus, and the control +// characters (U+0000 through U+001F). +// There are two-character sequence escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; + } + return out - initout; +} + +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} + +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { + return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} + +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); + } else { + is_valid = true; + } +} + +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; + } +} + +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } +} + +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} + +namespace internal { + +// We could specialize further for 32-bit integers. +simdjson_really_inline int int_log2(uint32_t x) { return (63 - leading_zeroes(x | 1)); } + +simdjson_really_inline int fast_digit_count(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int int_log2(uint64_t x) { return 63 - leading_zeroes(x | 1); } + +simdjson_really_inline int fast_digit_count(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + return fast_digit_count(v); +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal + +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + if (negative) { + buffer.get()[position++] = '-'; + } + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} + +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void string_builder::escape_and_append_with_quotes(const char* input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} + +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); + } +} + +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} +template +requires(std::is_convertible::value || +std::is_same::value ) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif + +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) +template +requires (!std::is_convertible::value) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range + } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); + + } +} + +#endif + +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(std::string_view()); +} + +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif + +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); +} + +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; +} + +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); +} + +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } +} + + +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; + } +} + + +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; + } +} + + +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } +} + + +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} + + +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; + } +} + +template +simdjson_inline void string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert( + std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_convertible::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else { + append(value); + } +} + +} // namespace builder +} // namespace fallback +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for fallback */ +/* including simdjson/generic/ondemand/json_builder.h for fallback: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for fallback */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace fallback { +namespace builder { + +// Concept that checks if a type is a container but not a string (because +// strings handling must be handled differently) +template +concept container_but_not_string = + requires(T a) { + { a.size() } -> std::convertible_to; + { + a[std::declval()] + }; // check if elements are accessible for the subscript operator + } && !std::is_same_v && + !std::is_same_v && !std::is_same_v; + +template + requires(container_but_not_string) +constexpr void atom(string_builder &b, const T &t) { + if (t.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, t[0]); + for (size_t i = 1; i < t.size(); ++i) { + b.append(','); + atom(b, t[i]); + } + b.append(']'); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); +} + +template +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); +} + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); +} + +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); +} + +// Support for optional types (std::optional, etc.) +template +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } +} + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } +} + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif +} + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); +} + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); +} + +// works for container +template + requires(container_but_not_string) +void append(string_builder &b, const Z &z) { + if (z.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, z[0]); + for (size_t i = 1; i < z.size(); ++i) { + b.append(','); + atom(b, z[i]); + } + b.append(']'); +} + +template +simdjson_result to_json_string(const Z &z, size_t initial_capacity = 1024) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} + +template +simdjson_error to_json(const Z &z, std::string &s) { + string_builder b; + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; +} + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} +} // namespace builder +} // namespace fallback +// Alias the function template to 'to' in the global namespace +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + return fallback::builder::to_json_string(z, initial_capacity); +} +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for fallback */ /* end file simdjson/generic/ondemand/amalgamated.h for fallback */ /* including simdjson/fallback/end.h: #include "simdjson/fallback/end.h" */ @@ -54272,6 +59630,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 32; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace haswell } // namespace simdjson @@ -54338,7 +59721,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for haswell */ /* including simdjson/generic/ondemand/deserialize.h for haswell: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for haswell */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -54417,7 +59800,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -54428,28 +59811,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = haswell::ondemand::array; + using object_type = haswell::ondemand::object; using value_type = haswell::ondemand::value; using document_type = haswell::ondemand::document; using document_reference_type = haswell::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -54459,7 +59858,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for haswell */ /* including simdjson/generic/ondemand/value_iterator.h for haswell: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -54591,7 +59990,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -54601,7 +60001,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -54723,7 +60124,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -54733,7 +60135,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -54995,13 +60398,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -55018,6 +60422,7 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. @@ -55025,28 +60430,47 @@ class value { */ template simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_CONCEPTS + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + if (is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -55757,7 +61181,22 @@ struct simdjson_result : public haswell::implementatio simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -56369,6 +61808,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -56575,6 +62015,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -56714,10 +62160,10 @@ struct simdjson_result : public haswell::imp simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(haswell::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -56733,6 +62179,7 @@ struct simdjson_result : public haswell::imp /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace haswell { @@ -56851,7 +62298,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -56939,6 +62388,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -56946,10 +62400,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -56975,14 +62429,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -57084,13 +62540,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -57254,6 +62736,37 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. @@ -57332,7 +62845,28 @@ struct simdjson_result : public haswell::implementatio simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -57377,7 +62911,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -57401,6 +62936,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -57419,7 +62959,6 @@ namespace simdjson { template<> struct simdjson_result : public haswell::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(haswell::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -57432,6 +62971,8 @@ struct simdjson_result : public haswell::impl simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -57625,7 +63166,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -57648,7 +63189,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -57666,35 +63207,42 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -57757,7 +63305,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -57766,7 +63314,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -57916,11 +63464,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -58210,7 +63774,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -58223,7 +63787,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -58246,30 +63810,37 @@ class document_reference { */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -58354,6 +63925,9 @@ struct simdjson_result : public haswell::implementa template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using haswell::implementation_simdjson_result_base::operator*; + using haswell::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator haswell::ondemand::array() & noexcept(false); @@ -58612,6 +64186,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -58625,6 +64200,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -58668,6 +64244,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -58679,6 +64260,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -59139,6 +64721,36 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. @@ -59182,12 +64794,32 @@ struct simdjson_result : public haswell::implementati simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -59316,6 +64948,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -59387,17 +65033,22 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { template @@ -59444,6 +65095,32 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -59461,9 +65138,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - haswell::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, haswell::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -59489,6 +65170,77 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + haswell::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, haswell::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + haswell::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, haswell::ondemand::value &val, T &out) noexcept { + haswell::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, haswell::ondemand::document &doc, T &out) noexcept { + haswell::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, haswell::ondemand::document_reference &doc, T &out) noexcept { + haswell::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + /** * This CPO (Customization Point Object) will help deserialize into @@ -59530,17 +65282,16 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template +template requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -59549,10 +65300,295 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers && !require_custom_serialization); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + haswell::ondemand::object obj; + if constexpr (std::is_same_v, haswell::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + error_code e = simdjson::SUCCESS; + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + // Note: removed static assert as optional types are now handled generically + // as long we are succesful or the field is not found, we continue + if(e == simdjson::SUCCESS || e == simdjson::NO_SUCH_FIELD) { + e = obj[key].get(out.[:mem:]); + } + } + }; + return e; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for haswell */ // Inline definitions @@ -59836,6 +65872,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace haswell } // namespace simdjson @@ -59872,7 +65911,9 @@ simdjson_inline simdjson_result &simdjson_res ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -60630,8 +66671,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -61585,10 +67626,19 @@ simdjson_inline document_stream::iterator& document_stream::iterator::operator++ return *this; } +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; +} + + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; +} + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. @@ -62188,6 +68238,8 @@ simdjson_inline void json_iterator::assert_valid_position(token_position positio #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning #endif } @@ -63036,6 +69088,7 @@ simdjson_inline simdjson_result simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -63250,7 +69303,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -63282,10 +69335,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(s } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); + return iterate(pad_with_reserve(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { @@ -63307,7 +69357,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(c } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -63322,6 +69372,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iter } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; @@ -63330,16 +69381,24 @@ inline simdjson_result parser::iterate_many(const uint8_t *buf, if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } + inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); } - simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } @@ -63374,6 +69433,34 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u return result; } +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); +} + +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace haswell } // namespace simdjson @@ -63408,8 +69495,13 @@ namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); +} +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; +} simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; @@ -63586,6 +69678,10 @@ simdjson_inline simdjson_result simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(haswell::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); @@ -63611,6 +69707,9 @@ simdjson_inline simdjson_warn_unused simdjson_result simdjson_ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -64713,19 +70812,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -64988,7 +71091,7 @@ simdjson_inline simdjson_result value_iterator::type() const noexcept case '5': case '6': case '7': case '8': case '9': return json_type::number; default: - return TAPE_ERROR; + return json_type::unknown; } } @@ -65028,6 +71131,1221 @@ simdjson_inline simdjson_result::simdjson_res #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for haswell */ +// JSON builder, ideally they should not be part of the ondemand directory +// but it is convenient for now to have them here. +/* including simdjson/generic/ondemand/json_string_builder.h for haswell: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for haswell */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace haswell { +namespace builder { + +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = 1024); + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // Support for optional types (std::optional, etc.) + template + simdjson_inline void append(const T &opt); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + simdjson::haswell::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +#endif + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for haswell */ +/* including simdjson/generic/ondemand/json_string_builder-inl.h for haswell: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for haswell */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + +namespace simdjson { +namespace haswell { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +/** + +A possible SWAR implementation of has_json_escapable_byte. It is not used because +it is slower than the current implementation. It is kept here for reference (to show +that we tried it). + +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} + +**/ + +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if(json_quotable_character[static_cast(c)]) { + return true; + } + } + return false; +} + +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); +} +#endif + + +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); +} + +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; + +// All Unicode characters may be placed within the quotation marks, except for the +// characters that MUST be escaped: quotation mark, reverse solidus, and the control +// characters (U+0000 through U+001F). +// There are two-character sequence escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; + } + return out - initout; +} + +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} + +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { + return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} + +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); + } else { + is_valid = true; + } +} + +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; + } +} + +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } +} + +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} + +namespace internal { + +// We could specialize further for 32-bit integers. +simdjson_really_inline int int_log2(uint32_t x) { return (63 - leading_zeroes(x | 1)); } + +simdjson_really_inline int fast_digit_count(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int int_log2(uint64_t x) { return 63 - leading_zeroes(x | 1); } + +simdjson_really_inline int fast_digit_count(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + return fast_digit_count(v); +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal + +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + if (negative) { + buffer.get()[position++] = '-'; + } + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} + +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void string_builder::escape_and_append_with_quotes(const char* input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} + +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); + } +} + +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} +template +requires(std::is_convertible::value || +std::is_same::value ) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif + +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) +template +requires (!std::is_convertible::value) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range + } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); + + } +} + +#endif + +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(std::string_view()); +} + +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif + +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); +} + +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; +} + +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); +} + +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } +} + + +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; + } +} + + +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; + } +} + + +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } +} + + +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} + + +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; + } +} + +template +simdjson_inline void string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert( + std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_convertible::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else { + append(value); + } +} + +} // namespace builder +} // namespace haswell +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for haswell */ +/* including simdjson/generic/ondemand/json_builder.h for haswell: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for haswell */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace haswell { +namespace builder { + +// Concept that checks if a type is a container but not a string (because +// strings handling must be handled differently) +template +concept container_but_not_string = + requires(T a) { + { a.size() } -> std::convertible_to; + { + a[std::declval()] + }; // check if elements are accessible for the subscript operator + } && !std::is_same_v && + !std::is_same_v && !std::is_same_v; + +template + requires(container_but_not_string) +constexpr void atom(string_builder &b, const T &t) { + if (t.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, t[0]); + for (size_t i = 1; i < t.size(); ++i) { + b.append(','); + atom(b, t[i]); + } + b.append(']'); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); +} + +template +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); +} + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); +} + +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); +} + +// Support for optional types (std::optional, etc.) +template +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } +} + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } +} + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif +} + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); +} + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); +} + +// works for container +template + requires(container_but_not_string) +void append(string_builder &b, const Z &z) { + if (z.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, z[0]); + for (size_t i = 1; i < z.size(); ++i) { + b.append(','); + atom(b, z[i]); + } + b.append(']'); +} + +template +simdjson_result to_json_string(const Z &z, size_t initial_capacity = 1024) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} + +template +simdjson_error to_json(const Z &z, std::string &s) { + string_builder b; + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; +} + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} +} // namespace builder +} // namespace haswell +// Alias the function template to 'to' in the global namespace +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + return haswell::builder::to_json_string(z, initial_capacity); +} +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for haswell */ /* end file simdjson/generic/ondemand/amalgamated.h for haswell */ /* including simdjson/haswell/end.h: #include "simdjson/haswell/end.h" */ @@ -65334,7 +72652,6 @@ namespace simd { friend simdjson_really_inline uint64_t operator==(const simd8 lhs, const simd8 rhs) { return _mm512_cmpeq_epi8_mask(lhs, rhs); } - static const int SIZE = sizeof(base::value); template @@ -65677,6 +72994,35 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 64; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(uint64_t(escape_bits)); } + + __mmask64 escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + __mmask64 is_quote = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('"')); + __mmask64 is_backslash = _mm512_cmpeq_epi8_mask(v, _mm512_set1_epi8('\\')); + __mmask64 is_control = _mm512_cmplt_epi8_mask(v, _mm512_set1_epi8(32)); + return { + (is_backslash | is_quote | is_control) + }; +} + + + + } // unnamed namespace } // namespace icelake } // namespace simdjson @@ -65803,7 +73149,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for icelake */ /* including simdjson/generic/ondemand/deserialize.h for icelake: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for icelake */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -65882,7 +73228,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -65893,28 +73239,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = icelake::ondemand::array; + using object_type = icelake::ondemand::object; using value_type = icelake::ondemand::value; using document_type = icelake::ondemand::document; using document_reference_type = icelake::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -65924,7 +73286,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for icelake */ /* including simdjson/generic/ondemand/value_iterator.h for icelake: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -66056,7 +73418,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -66066,7 +73429,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -66188,7 +73552,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -66198,7 +73563,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -66460,13 +73826,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -66483,6 +73850,7 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. @@ -66490,28 +73858,47 @@ class value { */ template simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_CONCEPTS + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + if (is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -67222,7 +74609,22 @@ struct simdjson_result : public icelake::implementatio simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -67834,6 +75236,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -68040,6 +75443,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -68179,10 +75588,10 @@ struct simdjson_result : public icelake::imp simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(icelake::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -68198,6 +75607,7 @@ struct simdjson_result : public icelake::imp /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace icelake { @@ -68316,7 +75726,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -68404,6 +75816,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -68411,10 +75828,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -68440,14 +75857,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -68549,13 +75968,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -68719,6 +76164,37 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. @@ -68797,7 +76273,28 @@ struct simdjson_result : public icelake::implementatio simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -68842,7 +76339,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -68866,6 +76364,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -68884,7 +76387,6 @@ namespace simdjson { template<> struct simdjson_result : public icelake::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(icelake::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -68897,6 +76399,8 @@ struct simdjson_result : public icelake::impl simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -69090,7 +76594,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -69113,7 +76617,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -69131,35 +76635,42 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -69222,7 +76733,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -69231,7 +76742,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -69381,11 +76892,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -69675,7 +77202,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -69688,7 +77215,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -69711,30 +77238,37 @@ class document_reference { */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -69819,6 +77353,9 @@ struct simdjson_result : public icelake::implementa template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using icelake::implementation_simdjson_result_base::operator*; + using icelake::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator icelake::ondemand::array() & noexcept(false); @@ -70077,6 +77614,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -70090,6 +77628,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -70133,6 +77672,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -70144,6 +77688,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -70604,6 +78149,36 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. @@ -70647,12 +78222,32 @@ struct simdjson_result : public icelake::implementati simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -70781,6 +78376,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -70852,17 +78461,22 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { template @@ -70909,6 +78523,32 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -70926,9 +78566,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - icelake::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, icelake::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -70954,6 +78598,77 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + icelake::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, icelake::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + icelake::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, icelake::ondemand::value &val, T &out) noexcept { + icelake::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, icelake::ondemand::document &doc, T &out) noexcept { + icelake::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, icelake::ondemand::document_reference &doc, T &out) noexcept { + icelake::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + /** * This CPO (Customization Point Object) will help deserialize into @@ -70995,17 +78710,16 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template +template requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -71014,10 +78728,295 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers && !require_custom_serialization); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + icelake::ondemand::object obj; + if constexpr (std::is_same_v, icelake::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + error_code e = simdjson::SUCCESS; + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + // Note: removed static assert as optional types are now handled generically + // as long we are succesful or the field is not found, we continue + if(e == simdjson::SUCCESS || e == simdjson::NO_SUCH_FIELD) { + e = obj[key].get(out.[:mem:]); + } + } + }; + return e; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for icelake */ // Inline definitions @@ -71301,6 +79300,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace icelake } // namespace simdjson @@ -71337,7 +79339,9 @@ simdjson_inline simdjson_result &simdjson_res ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -72095,8 +80099,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -73050,10 +81054,19 @@ simdjson_inline document_stream::iterator& document_stream::iterator::operator++ return *this; } +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; +} + + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; +} + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. @@ -73653,6 +81666,8 @@ simdjson_inline void json_iterator::assert_valid_position(token_position positio #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning #endif } @@ -74501,6 +82516,7 @@ simdjson_inline simdjson_result simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -74715,7 +82731,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -74747,10 +82763,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(s } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); + return iterate(pad_with_reserve(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { @@ -74772,7 +82785,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(c } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -74787,6 +82800,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iter } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; @@ -74795,16 +82809,24 @@ inline simdjson_result parser::iterate_many(const uint8_t *buf, if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } + inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); } - simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } @@ -74839,6 +82861,34 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u return result; } +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); +} + +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace icelake } // namespace simdjson @@ -74873,8 +82923,13 @@ namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); +} +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; +} simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; @@ -75051,6 +83106,10 @@ simdjson_inline simdjson_result simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(icelake::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); @@ -75076,6 +83135,9 @@ simdjson_inline simdjson_warn_unused simdjson_result simdjson_ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -76178,19 +84240,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -76453,7 +84519,7 @@ simdjson_inline simdjson_result value_iterator::type() const noexcept case '5': case '6': case '7': case '8': case '9': return json_type::number; default: - return TAPE_ERROR; + return json_type::unknown; } } @@ -76493,6 +84559,1221 @@ simdjson_inline simdjson_result::simdjson_res #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for icelake */ +// JSON builder, ideally they should not be part of the ondemand directory +// but it is convenient for now to have them here. +/* including simdjson/generic/ondemand/json_string_builder.h for icelake: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for icelake */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace icelake { +namespace builder { + +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = 1024); + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // Support for optional types (std::optional, etc.) + template + simdjson_inline void append(const T &opt); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + simdjson::icelake::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +#endif + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for icelake */ +/* including simdjson/generic/ondemand/json_string_builder-inl.h for icelake: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for icelake */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + +namespace simdjson { +namespace icelake { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +/** + +A possible SWAR implementation of has_json_escapable_byte. It is not used because +it is slower than the current implementation. It is kept here for reference (to show +that we tried it). + +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} + +**/ + +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if(json_quotable_character[static_cast(c)]) { + return true; + } + } + return false; +} + +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); +} +#endif + + +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); +} + +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; + +// All Unicode characters may be placed within the quotation marks, except for the +// characters that MUST be escaped: quotation mark, reverse solidus, and the control +// characters (U+0000 through U+001F). +// There are two-character sequence escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; + } + return out - initout; +} + +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} + +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { + return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} + +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); + } else { + is_valid = true; + } +} + +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; + } +} + +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } +} + +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} + +namespace internal { + +// We could specialize further for 32-bit integers. +simdjson_really_inline int int_log2(uint32_t x) { return (63 - leading_zeroes(x | 1)); } + +simdjson_really_inline int fast_digit_count(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int int_log2(uint64_t x) { return 63 - leading_zeroes(x | 1); } + +simdjson_really_inline int fast_digit_count(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + return fast_digit_count(v); +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal + +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + if (negative) { + buffer.get()[position++] = '-'; + } + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} + +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void string_builder::escape_and_append_with_quotes(const char* input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} + +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); + } +} + +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} +template +requires(std::is_convertible::value || +std::is_same::value ) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif + +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) +template +requires (!std::is_convertible::value) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range + } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); + + } +} + +#endif + +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(std::string_view()); +} + +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif + +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); +} + +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; +} + +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); +} + +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } +} + + +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; + } +} + + +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; + } +} + + +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } +} + + +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} + + +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; + } +} + +template +simdjson_inline void string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert( + std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_convertible::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else { + append(value); + } +} + +} // namespace builder +} // namespace icelake +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for icelake */ +/* including simdjson/generic/ondemand/json_builder.h for icelake: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for icelake */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace icelake { +namespace builder { + +// Concept that checks if a type is a container but not a string (because +// strings handling must be handled differently) +template +concept container_but_not_string = + requires(T a) { + { a.size() } -> std::convertible_to; + { + a[std::declval()] + }; // check if elements are accessible for the subscript operator + } && !std::is_same_v && + !std::is_same_v && !std::is_same_v; + +template + requires(container_but_not_string) +constexpr void atom(string_builder &b, const T &t) { + if (t.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, t[0]); + for (size_t i = 1; i < t.size(); ++i) { + b.append(','); + atom(b, t[i]); + } + b.append(']'); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); +} + +template +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); +} + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); +} + +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); +} + +// Support for optional types (std::optional, etc.) +template +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } +} + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } +} + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif +} + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); +} + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); +} + +// works for container +template + requires(container_but_not_string) +void append(string_builder &b, const Z &z) { + if (z.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, z[0]); + for (size_t i = 1; i < z.size(); ++i) { + b.append(','); + atom(b, z[i]); + } + b.append(']'); +} + +template +simdjson_result to_json_string(const Z &z, size_t initial_capacity = 1024) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} + +template +simdjson_error to_json(const Z &z, std::string &s) { + string_builder b; + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; +} + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} +} // namespace builder +} // namespace icelake +// Alias the function template to 'to' in the global namespace +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + return icelake::builder::to_json_string(z, initial_capacity); +} +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for icelake */ /* end file simdjson/generic/ondemand/amalgamated.h for icelake */ /* including simdjson/icelake/end.h: #include "simdjson/icelake/end.h" */ @@ -77317,6 +86598,32 @@ backslash_and_quote::copy_and_find(const uint8_t *src, uint8_t *dst) { }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + // We store it as a 64-bit bitmask even though we only need 16 bits. + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace ppc64 } // namespace simdjson @@ -77385,7 +86692,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for ppc64 */ /* including simdjson/generic/ondemand/deserialize.h for ppc64: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for ppc64 */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -77464,7 +86771,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -77475,28 +86782,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = ppc64::ondemand::array; + using object_type = ppc64::ondemand::object; using value_type = ppc64::ondemand::value; using document_type = ppc64::ondemand::document; using document_reference_type = ppc64::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -77506,7 +86829,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for ppc64 */ /* including simdjson/generic/ondemand/value_iterator.h for ppc64: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -77638,7 +86961,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -77648,7 +86972,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -77770,7 +87095,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -77780,7 +87106,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -78042,13 +87369,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -78065,6 +87393,7 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. @@ -78072,28 +87401,47 @@ class value { */ template simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_CONCEPTS + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + if (is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -78804,7 +88152,22 @@ struct simdjson_result : public ppc64::implementation_si simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -79416,6 +88779,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -79622,6 +88986,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -79761,10 +89131,10 @@ struct simdjson_result : public ppc64::impleme simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(ppc64::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -79780,6 +89150,7 @@ struct simdjson_result : public ppc64::impleme /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace ppc64 { @@ -79898,7 +89269,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -79986,6 +89359,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -79993,10 +89371,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -80022,14 +89400,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -80131,13 +89511,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -80301,6 +89707,37 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. @@ -80379,7 +89816,28 @@ struct simdjson_result : public ppc64::implementation_si simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -80424,7 +89882,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -80448,6 +89907,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -80466,7 +89930,6 @@ namespace simdjson { template<> struct simdjson_result : public ppc64::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(ppc64::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -80479,6 +89942,8 @@ struct simdjson_result : public ppc64::implemen simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -80672,7 +90137,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -80695,7 +90160,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -80713,35 +90178,42 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -80804,7 +90276,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -80813,7 +90285,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -80963,11 +90435,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -81257,7 +90745,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -81270,7 +90758,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -81293,30 +90781,37 @@ class document_reference { */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -81401,6 +90896,9 @@ struct simdjson_result : public ppc64::implementation template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using ppc64::implementation_simdjson_result_base::operator*; + using ppc64::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator ppc64::ondemand::array() & noexcept(false); @@ -81659,6 +91157,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -81672,6 +91171,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -81715,6 +91215,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -81726,6 +91231,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -82186,6 +91692,36 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. @@ -82229,12 +91765,32 @@ struct simdjson_result : public ppc64::implementation_s simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -82363,6 +91919,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -82434,17 +92004,22 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { template @@ -82491,6 +92066,32 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -82508,9 +92109,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - ppc64::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, ppc64::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -82536,6 +92141,77 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + ppc64::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, ppc64::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + ppc64::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, ppc64::ondemand::value &val, T &out) noexcept { + ppc64::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, ppc64::ondemand::document &doc, T &out) noexcept { + ppc64::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, ppc64::ondemand::document_reference &doc, T &out) noexcept { + ppc64::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + /** * This CPO (Customization Point Object) will help deserialize into @@ -82577,17 +92253,16 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template +template requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -82596,10 +92271,295 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers && !require_custom_serialization); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + ppc64::ondemand::object obj; + if constexpr (std::is_same_v, ppc64::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + error_code e = simdjson::SUCCESS; + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + // Note: removed static assert as optional types are now handled generically + // as long we are succesful or the field is not found, we continue + if(e == simdjson::SUCCESS || e == simdjson::NO_SUCH_FIELD) { + e = obj[key].get(out.[:mem:]); + } + } + }; + return e; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for ppc64 */ // Inline definitions @@ -82883,6 +92843,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace ppc64 } // namespace simdjson @@ -82919,7 +92882,9 @@ simdjson_inline simdjson_result &simdjson_resul ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -83677,8 +93642,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -84632,10 +94597,19 @@ simdjson_inline document_stream::iterator& document_stream::iterator::operator++ return *this; } +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; +} + + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; +} + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. @@ -85235,6 +95209,8 @@ simdjson_inline void json_iterator::assert_valid_position(token_position positio #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning #endif } @@ -86083,6 +96059,7 @@ simdjson_inline simdjson_result simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -86297,7 +96274,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -86329,10 +96306,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(s } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); + return iterate(pad_with_reserve(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { @@ -86354,7 +96328,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(c } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -86369,6 +96343,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iter } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; @@ -86377,16 +96352,24 @@ inline simdjson_result parser::iterate_many(const uint8_t *buf, if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } + inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); } - simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } @@ -86421,6 +96404,34 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u return result; } +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); +} + +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace ppc64 } // namespace simdjson @@ -86455,8 +96466,13 @@ namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); +} +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; +} simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; @@ -86633,6 +96649,10 @@ simdjson_inline simdjson_result simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(ppc64::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); @@ -86658,6 +96678,9 @@ simdjson_inline simdjson_warn_unused simdjson_result simdjson_ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -87760,19 +97783,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -88035,7 +98062,7 @@ simdjson_inline simdjson_result value_iterator::type() const noexcept case '5': case '6': case '7': case '8': case '9': return json_type::number; default: - return TAPE_ERROR; + return json_type::unknown; } } @@ -88075,6 +98102,1221 @@ simdjson_inline simdjson_result::simdjson_resul #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for ppc64 */ +// JSON builder, ideally they should not be part of the ondemand directory +// but it is convenient for now to have them here. +/* including simdjson/generic/ondemand/json_string_builder.h for ppc64: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for ppc64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace ppc64 { +namespace builder { + +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = 1024); + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // Support for optional types (std::optional, etc.) + template + simdjson_inline void append(const T &opt); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + simdjson::ppc64::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +#endif + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for ppc64 */ +/* including simdjson/generic/ondemand/json_string_builder-inl.h for ppc64: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for ppc64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + +namespace simdjson { +namespace ppc64 { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +/** + +A possible SWAR implementation of has_json_escapable_byte. It is not used because +it is slower than the current implementation. It is kept here for reference (to show +that we tried it). + +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} + +**/ + +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if(json_quotable_character[static_cast(c)]) { + return true; + } + } + return false; +} + +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); +} +#endif + + +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); +} + +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; + +// All Unicode characters may be placed within the quotation marks, except for the +// characters that MUST be escaped: quotation mark, reverse solidus, and the control +// characters (U+0000 through U+001F). +// There are two-character sequence escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; + } + return out - initout; +} + +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} + +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { + return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} + +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); + } else { + is_valid = true; + } +} + +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; + } +} + +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } +} + +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} + +namespace internal { + +// We could specialize further for 32-bit integers. +simdjson_really_inline int int_log2(uint32_t x) { return (63 - leading_zeroes(x | 1)); } + +simdjson_really_inline int fast_digit_count(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int int_log2(uint64_t x) { return 63 - leading_zeroes(x | 1); } + +simdjson_really_inline int fast_digit_count(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + return fast_digit_count(v); +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal + +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + if (negative) { + buffer.get()[position++] = '-'; + } + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} + +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void string_builder::escape_and_append_with_quotes(const char* input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} + +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); + } +} + +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} +template +requires(std::is_convertible::value || +std::is_same::value ) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif + +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) +template +requires (!std::is_convertible::value) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range + } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); + + } +} + +#endif + +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(std::string_view()); +} + +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif + +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); +} + +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; +} + +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); +} + +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } +} + + +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; + } +} + + +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; + } +} + + +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } +} + + +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} + + +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; + } +} + +template +simdjson_inline void string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert( + std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_convertible::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else { + append(value); + } +} + +} // namespace builder +} // namespace ppc64 +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for ppc64 */ +/* including simdjson/generic/ondemand/json_builder.h for ppc64: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for ppc64 */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace ppc64 { +namespace builder { + +// Concept that checks if a type is a container but not a string (because +// strings handling must be handled differently) +template +concept container_but_not_string = + requires(T a) { + { a.size() } -> std::convertible_to; + { + a[std::declval()] + }; // check if elements are accessible for the subscript operator + } && !std::is_same_v && + !std::is_same_v && !std::is_same_v; + +template + requires(container_but_not_string) +constexpr void atom(string_builder &b, const T &t) { + if (t.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, t[0]); + for (size_t i = 1; i < t.size(); ++i) { + b.append(','); + atom(b, t[i]); + } + b.append(']'); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); +} + +template +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); +} + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); +} + +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); +} + +// Support for optional types (std::optional, etc.) +template +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } +} + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } +} + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif +} + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); +} + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); +} + +// works for container +template + requires(container_but_not_string) +void append(string_builder &b, const Z &z) { + if (z.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, z[0]); + for (size_t i = 1; i < z.size(); ++i) { + b.append(','); + atom(b, z[i]); + } + b.append(']'); +} + +template +simdjson_result to_json_string(const Z &z, size_t initial_capacity = 1024) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} + +template +simdjson_error to_json(const Z &z, std::string &s) { + string_builder b; + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; +} + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} +} // namespace builder +} // namespace ppc64 +// Alias the function template to 'to' in the global namespace +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + return ppc64::builder::to_json_string(z, initial_capacity); +} +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for ppc64 */ /* end file simdjson/generic/ondemand/amalgamated.h for ppc64 */ /* including simdjson/ppc64/end.h: #include "simdjson/ppc64/end.h" */ @@ -89218,6 +100460,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + uint64_t((is_backslash | is_quote | is_control).to_bitmask()) + }; +} + } // unnamed namespace } // namespace westmere } // namespace simdjson @@ -89284,7 +100551,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for westmere */ /* including simdjson/generic/ondemand/deserialize.h for westmere: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for westmere */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -89363,7 +100630,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -89374,28 +100641,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = westmere::ondemand::array; + using object_type = westmere::ondemand::object; using value_type = westmere::ondemand::value; using document_type = westmere::ondemand::document; using document_reference_type = westmere::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -89405,7 +100688,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for westmere */ /* including simdjson/generic/ondemand/value_iterator.h for westmere: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -89537,7 +100820,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -89547,7 +100831,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -89669,7 +100954,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -89679,7 +100965,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -89941,13 +101228,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -89964,6 +101252,7 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. @@ -89971,28 +101260,47 @@ class value { */ template simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_CONCEPTS + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + if (is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -90703,7 +102011,22 @@ struct simdjson_result : public westmere::implementat simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -91315,6 +102638,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -91521,6 +102845,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -91660,10 +102990,10 @@ struct simdjson_result : public westmere::i simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(westmere::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -91679,6 +103009,7 @@ struct simdjson_result : public westmere::i /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace westmere { @@ -91797,7 +103128,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -91885,6 +103218,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -91892,10 +103230,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -91921,14 +103259,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -92030,13 +103370,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -92200,6 +103566,37 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. @@ -92278,7 +103675,28 @@ struct simdjson_result : public westmere::implementat simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -92323,7 +103741,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -92347,6 +103766,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -92365,7 +103789,6 @@ namespace simdjson { template<> struct simdjson_result : public westmere::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(westmere::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -92378,6 +103801,8 @@ struct simdjson_result : public westmere::im simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -92571,7 +103996,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -92594,7 +104019,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -92612,35 +104037,42 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -92703,7 +104135,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -92712,7 +104144,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -92862,11 +104294,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -93156,7 +104604,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -93169,7 +104617,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -93192,30 +104640,37 @@ class document_reference { */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -93300,6 +104755,9 @@ struct simdjson_result : public westmere::implemen template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using westmere::implementation_simdjson_result_base::operator*; + using westmere::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator westmere::ondemand::array() & noexcept(false); @@ -93558,6 +105016,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -93571,6 +105030,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -93614,6 +105074,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -93625,6 +105090,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -94085,6 +105551,36 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. @@ -94128,12 +105624,32 @@ struct simdjson_result : public westmere::implementa simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -94262,6 +105778,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -94333,17 +105863,22 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { template @@ -94390,6 +105925,32 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -94407,9 +105968,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - westmere::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, westmere::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -94435,6 +106000,77 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + westmere::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, westmere::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + westmere::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, westmere::ondemand::value &val, T &out) noexcept { + westmere::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, westmere::ondemand::document &doc, T &out) noexcept { + westmere::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, westmere::ondemand::document_reference &doc, T &out) noexcept { + westmere::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + /** * This CPO (Customization Point Object) will help deserialize into @@ -94476,17 +106112,16 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template +template requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -94495,10 +106130,295 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers && !require_custom_serialization); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + westmere::ondemand::object obj; + if constexpr (std::is_same_v, westmere::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + error_code e = simdjson::SUCCESS; + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + // Note: removed static assert as optional types are now handled generically + // as long we are succesful or the field is not found, we continue + if(e == simdjson::SUCCESS || e == simdjson::NO_SUCH_FIELD) { + e = obj[key].get(out.[:mem:]); + } + } + }; + return e; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for westmere */ // Inline definitions @@ -94782,6 +106702,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace westmere } // namespace simdjson @@ -94818,7 +106741,9 @@ simdjson_inline simdjson_result &simdjson_re ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -95576,8 +107501,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -96531,10 +108456,19 @@ simdjson_inline document_stream::iterator& document_stream::iterator::operator++ return *this; } +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; +} + + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; +} + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. @@ -97134,6 +109068,8 @@ simdjson_inline void json_iterator::assert_valid_position(token_position positio #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning #endif } @@ -97982,6 +109918,7 @@ simdjson_inline simdjson_result simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -98196,7 +110133,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -98228,10 +110165,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(s } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); + return iterate(pad_with_reserve(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { @@ -98253,7 +110187,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(c } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -98268,6 +110202,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iter } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; @@ -98276,16 +110211,24 @@ inline simdjson_result parser::iterate_many(const uint8_t *buf, if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } + inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); } - simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } @@ -98320,6 +110263,34 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u return result; } +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); +} + +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace westmere } // namespace simdjson @@ -98354,8 +110325,13 @@ namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); +} +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; +} simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; @@ -98532,6 +110508,10 @@ simdjson_inline simdjson_result simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(westmere::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); @@ -98557,6 +110537,9 @@ simdjson_inline simdjson_warn_unused simdjson_result simdjson_ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -99659,19 +111642,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -99934,7 +111921,7 @@ simdjson_inline simdjson_result value_iterator::type() const noexcept case '5': case '6': case '7': case '8': case '9': return json_type::number; default: - return TAPE_ERROR; + return json_type::unknown; } } @@ -99974,6 +111961,1221 @@ simdjson_inline simdjson_result::simdjson_re #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for westmere */ +// JSON builder, ideally they should not be part of the ondemand directory +// but it is convenient for now to have them here. +/* including simdjson/generic/ondemand/json_string_builder.h for westmere: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for westmere */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace westmere { +namespace builder { + +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = 1024); + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // Support for optional types (std::optional, etc.) + template + simdjson_inline void append(const T &opt); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + simdjson::westmere::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +#endif + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for westmere */ +/* including simdjson/generic/ondemand/json_string_builder-inl.h for westmere: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for westmere */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + +namespace simdjson { +namespace westmere { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +/** + +A possible SWAR implementation of has_json_escapable_byte. It is not used because +it is slower than the current implementation. It is kept here for reference (to show +that we tried it). + +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} + +**/ + +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if(json_quotable_character[static_cast(c)]) { + return true; + } + } + return false; +} + +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); +} +#endif + + +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); +} + +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; + +// All Unicode characters may be placed within the quotation marks, except for the +// characters that MUST be escaped: quotation mark, reverse solidus, and the control +// characters (U+0000 through U+001F). +// There are two-character sequence escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; + } + return out - initout; +} + +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} + +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { + return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} + +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); + } else { + is_valid = true; + } +} + +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; + } +} + +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } +} + +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} + +namespace internal { + +// We could specialize further for 32-bit integers. +simdjson_really_inline int int_log2(uint32_t x) { return (63 - leading_zeroes(x | 1)); } + +simdjson_really_inline int fast_digit_count(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int int_log2(uint64_t x) { return 63 - leading_zeroes(x | 1); } + +simdjson_really_inline int fast_digit_count(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + return fast_digit_count(v); +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal + +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + if (negative) { + buffer.get()[position++] = '-'; + } + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} + +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void string_builder::escape_and_append_with_quotes(const char* input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} + +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); + } +} + +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} +template +requires(std::is_convertible::value || +std::is_same::value ) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif + +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) +template +requires (!std::is_convertible::value) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range + } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); + + } +} + +#endif + +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(std::string_view()); +} + +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif + +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); +} + +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; +} + +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); +} + +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } +} + + +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; + } +} + + +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; + } +} + + +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } +} + + +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} + + +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; + } +} + +template +simdjson_inline void string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert( + std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_convertible::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else { + append(value); + } +} + +} // namespace builder +} // namespace westmere +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for westmere */ +/* including simdjson/generic/ondemand/json_builder.h for westmere: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for westmere */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace westmere { +namespace builder { + +// Concept that checks if a type is a container but not a string (because +// strings handling must be handled differently) +template +concept container_but_not_string = + requires(T a) { + { a.size() } -> std::convertible_to; + { + a[std::declval()] + }; // check if elements are accessible for the subscript operator + } && !std::is_same_v && + !std::is_same_v && !std::is_same_v; + +template + requires(container_but_not_string) +constexpr void atom(string_builder &b, const T &t) { + if (t.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, t[0]); + for (size_t i = 1; i < t.size(); ++i) { + b.append(','); + atom(b, t[i]); + } + b.append(']'); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); +} + +template +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); +} + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); +} + +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); +} + +// Support for optional types (std::optional, etc.) +template +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } +} + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } +} + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif +} + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); +} + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); +} + +// works for container +template + requires(container_but_not_string) +void append(string_builder &b, const Z &z) { + if (z.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, z[0]); + for (size_t i = 1; i < z.size(); ++i) { + b.append(','); + atom(b, z[i]); + } + b.append(']'); +} + +template +simdjson_result to_json_string(const Z &z, size_t initial_capacity = 1024) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} + +template +simdjson_error to_json(const Z &z, std::string &s) { + string_builder b; + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; +} + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} +} // namespace builder +} // namespace westmere +// Alias the function template to 'to' in the global namespace +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + return westmere::builder::to_json_string(z, initial_capacity); +} +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for westmere */ /* end file simdjson/generic/ondemand/amalgamated.h for westmere */ /* including simdjson/westmere/end.h: #include "simdjson/westmere/end.h" */ @@ -100592,6 +113794,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lsx } // namespace simdjson @@ -100660,7 +113887,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for lsx */ /* including simdjson/generic/ondemand/deserialize.h for lsx: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for lsx */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -100739,7 +113966,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -100750,28 +113977,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = lsx::ondemand::array; + using object_type = lsx::ondemand::object; using value_type = lsx::ondemand::value; using document_type = lsx::ondemand::document; using document_reference_type = lsx::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -100781,7 +114024,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for lsx */ /* including simdjson/generic/ondemand/value_iterator.h for lsx: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -100913,7 +114156,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -100923,7 +114167,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -101045,7 +114290,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -101055,7 +114301,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -101317,13 +114564,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -101340,6 +114588,7 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. @@ -101347,28 +114596,47 @@ class value { */ template simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_CONCEPTS + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + if (is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -102079,7 +115347,22 @@ struct simdjson_result : public lsx::implementation_simdjs simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -102691,6 +115974,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -102897,6 +116181,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -103036,10 +116326,10 @@ struct simdjson_result : public lsx::implementat simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(lsx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(lsx::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -103055,6 +116345,7 @@ struct simdjson_result : public lsx::implementat /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace lsx { @@ -103173,7 +116464,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -103261,6 +116554,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -103268,10 +116566,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -103297,14 +116595,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -103406,13 +116706,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -103576,6 +116902,37 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. @@ -103654,7 +117011,28 @@ struct simdjson_result : public lsx::implementation_simdjs simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -103699,7 +117077,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -103723,6 +117102,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -103741,7 +117125,6 @@ namespace simdjson { template<> struct simdjson_result : public lsx::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(lsx::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -103754,6 +117137,8 @@ struct simdjson_result : public lsx::implementati simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -103947,7 +117332,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -103970,7 +117355,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -103988,35 +117373,42 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -104079,7 +117471,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -104088,7 +117480,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -104238,11 +117630,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -104532,7 +117940,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -104545,7 +117953,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -104568,30 +117976,37 @@ class document_reference { */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -104676,6 +118091,9 @@ struct simdjson_result : public lsx::implementation_sim template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using lsx::implementation_simdjson_result_base::operator*; + using lsx::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator lsx::ondemand::array() & noexcept(false); @@ -104934,6 +118352,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -104947,6 +118366,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -104990,6 +118410,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -105001,6 +118426,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -105461,6 +118887,36 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. @@ -105504,12 +118960,32 @@ struct simdjson_result : public lsx::implementation_simdj simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -105638,6 +119114,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -105709,17 +119199,22 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { template @@ -105766,6 +119261,32 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -105783,9 +119304,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - lsx::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, lsx::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -105811,6 +119336,77 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + lsx::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, lsx::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + lsx::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, lsx::ondemand::value &val, T &out) noexcept { + lsx::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, lsx::ondemand::document &doc, T &out) noexcept { + lsx::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, lsx::ondemand::document_reference &doc, T &out) noexcept { + lsx::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + /** * This CPO (Customization Point Object) will help deserialize into @@ -105852,17 +119448,16 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template +template requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -105871,10 +119466,295 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers && !require_custom_serialization); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + lsx::ondemand::object obj; + if constexpr (std::is_same_v, lsx::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + error_code e = simdjson::SUCCESS; + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + // Note: removed static assert as optional types are now handled generically + // as long we are succesful or the field is not found, we continue + if(e == simdjson::SUCCESS || e == simdjson::NO_SUCH_FIELD) { + e = obj[key].get(out.[:mem:]); + } + } + }; + return e; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for lsx */ // Inline definitions @@ -106158,6 +120038,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace lsx } // namespace simdjson @@ -106194,7 +120077,9 @@ simdjson_inline simdjson_result &simdjson_result< ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -106952,8 +120837,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -107907,10 +121792,19 @@ simdjson_inline document_stream::iterator& document_stream::iterator::operator++ return *this; } +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; +} + + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; +} + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. @@ -108510,6 +122404,8 @@ simdjson_inline void json_iterator::assert_valid_position(token_position positio #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning #endif } @@ -109358,6 +123254,7 @@ simdjson_inline simdjson_result simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -109572,7 +123469,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -109604,10 +123501,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(s } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); + return iterate(pad_with_reserve(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { @@ -109629,7 +123523,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(c } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -109644,6 +123538,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iter } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; @@ -109652,16 +123547,24 @@ inline simdjson_result parser::iterate_many(const uint8_t *buf, if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } + inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); } - simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } @@ -109696,6 +123599,34 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u return result; } +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); +} + +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace lsx } // namespace simdjson @@ -109730,8 +123661,13 @@ namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); +} +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; +} simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; @@ -109908,6 +123844,10 @@ simdjson_inline simdjson_result simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(lsx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); @@ -109933,6 +123873,9 @@ simdjson_inline simdjson_warn_unused simdjson_result simdjson_ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -111035,19 +124978,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -111310,7 +125257,7 @@ simdjson_inline simdjson_result value_iterator::type() const noexcept case '5': case '6': case '7': case '8': case '9': return json_type::number; default: - return TAPE_ERROR; + return json_type::unknown; } } @@ -111350,6 +125297,1221 @@ simdjson_inline simdjson_result::simdjson_result( #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for lsx */ +// JSON builder, ideally they should not be part of the ondemand directory +// but it is convenient for now to have them here. +/* including simdjson/generic/ondemand/json_string_builder.h for lsx: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for lsx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lsx { +namespace builder { + +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = 1024); + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // Support for optional types (std::optional, etc.) + template + simdjson_inline void append(const T &opt); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + simdjson::lsx::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +#endif + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for lsx */ +/* including simdjson/generic/ondemand/json_string_builder-inl.h for lsx: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for lsx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + +namespace simdjson { +namespace lsx { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +/** + +A possible SWAR implementation of has_json_escapable_byte. It is not used because +it is slower than the current implementation. It is kept here for reference (to show +that we tried it). + +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} + +**/ + +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if(json_quotable_character[static_cast(c)]) { + return true; + } + } + return false; +} + +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); +} +#endif + + +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); +} + +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; + +// All Unicode characters may be placed within the quotation marks, except for the +// characters that MUST be escaped: quotation mark, reverse solidus, and the control +// characters (U+0000 through U+001F). +// There are two-character sequence escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; + } + return out - initout; +} + +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} + +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { + return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} + +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); + } else { + is_valid = true; + } +} + +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; + } +} + +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } +} + +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} + +namespace internal { + +// We could specialize further for 32-bit integers. +simdjson_really_inline int int_log2(uint32_t x) { return (63 - leading_zeroes(x | 1)); } + +simdjson_really_inline int fast_digit_count(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int int_log2(uint64_t x) { return 63 - leading_zeroes(x | 1); } + +simdjson_really_inline int fast_digit_count(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + return fast_digit_count(v); +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal + +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + if (negative) { + buffer.get()[position++] = '-'; + } + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} + +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void string_builder::escape_and_append_with_quotes(const char* input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} + +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); + } +} + +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} +template +requires(std::is_convertible::value || +std::is_same::value ) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif + +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) +template +requires (!std::is_convertible::value) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range + } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); + + } +} + +#endif + +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(std::string_view()); +} + +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif + +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); +} + +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; +} + +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); +} + +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } +} + + +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; + } +} + + +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; + } +} + + +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } +} + + +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} + + +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; + } +} + +template +simdjson_inline void string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert( + std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_convertible::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else { + append(value); + } +} + +} // namespace builder +} // namespace lsx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for lsx */ +/* including simdjson/generic/ondemand/json_builder.h for lsx: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for lsx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace lsx { +namespace builder { + +// Concept that checks if a type is a container but not a string (because +// strings handling must be handled differently) +template +concept container_but_not_string = + requires(T a) { + { a.size() } -> std::convertible_to; + { + a[std::declval()] + }; // check if elements are accessible for the subscript operator + } && !std::is_same_v && + !std::is_same_v && !std::is_same_v; + +template + requires(container_but_not_string) +constexpr void atom(string_builder &b, const T &t) { + if (t.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, t[0]); + for (size_t i = 1; i < t.size(); ++i) { + b.append(','); + atom(b, t[i]); + } + b.append(']'); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); +} + +template +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); +} + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); +} + +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); +} + +// Support for optional types (std::optional, etc.) +template +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } +} + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } +} + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif +} + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); +} + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); +} + +// works for container +template + requires(container_but_not_string) +void append(string_builder &b, const Z &z) { + if (z.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, z[0]); + for (size_t i = 1; i < z.size(); ++i) { + b.append(','); + atom(b, z[i]); + } + b.append(']'); +} + +template +simdjson_result to_json_string(const Z &z, size_t initial_capacity = 1024) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} + +template +simdjson_error to_json(const Z &z, std::string &s) { + string_builder b; + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; +} + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} +} // namespace builder +} // namespace lsx +// Alias the function template to 'to' in the global namespace +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + return lsx::builder::to_json_string(z, initial_capacity); +} +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for lsx */ /* end file simdjson/generic/ondemand/amalgamated.h for lsx */ /* including simdjson/lsx/end.h: #include "simdjson/lsx/end.h" */ @@ -111981,6 +127143,31 @@ simdjson_inline backslash_and_quote backslash_and_quote::copy_and_find(const uin }; } + +struct escaping { + static constexpr uint32_t BYTES_PROCESSED = 16; + simdjson_inline static escaping copy_and_find(const uint8_t *src, uint8_t *dst); + + simdjson_inline bool has_escape() { return escape_bits != 0; } + simdjson_inline int escape_index() { return trailing_zeroes(escape_bits); } + + uint64_t escape_bits; +}; // struct escaping + + + +simdjson_inline escaping escaping::copy_and_find(const uint8_t *src, uint8_t *dst) { + static_assert(SIMDJSON_PADDING >= (BYTES_PROCESSED - 1), "escaping finder must process fewer than SIMDJSON_PADDING bytes"); + simd8 v(src); + v.store(dst); + simd8 is_quote = (v == '"'); + simd8 is_backslash = (v == '\\'); + simd8 is_control = (v < 32); + return { + (is_backslash | is_quote | is_control).to_bitmask() + }; +} + } // unnamed namespace } // namespace lasx } // namespace simdjson @@ -112049,7 +127236,7 @@ class value_iterator; /* end file simdjson/generic/ondemand/base.h for lasx */ /* including simdjson/generic/ondemand/deserialize.h for lasx: #include "simdjson/generic/ondemand/deserialize.h" */ /* begin file simdjson/generic/ondemand/deserialize.h for lasx */ -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS #ifndef SIMDJSON_ONDEMAND_DESERIALIZE_H /* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ @@ -112128,7 +127315,7 @@ template concept custom_deserializable = tag_invocable; template -concept deserializable = custom_deserializable || is_builtin_deserializable_v; +concept deserializable = custom_deserializable || is_builtin_deserializable_v || concepts::optional_type; template concept nothrow_custom_deserializable = nothrow_tag_invocable; @@ -112139,28 +127326,44 @@ concept nothrow_deserializable = nothrow_custom_deserializable || is_bu /// Deserialize Tag inline constexpr struct deserialize_tag { + using array_type = lasx::ondemand::array; + using object_type = lasx::ondemand::object; using value_type = lasx::ondemand::value; using document_type = lasx::ondemand::document; using document_reference_type = lasx::ondemand::document_reference; + // Customization Point for array + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(array_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + + // Customization Point for object + template + requires custom_deserializable + simdjson_warn_unused constexpr /* error_code */ auto operator()(object_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + return tag_invoke(*this, object, output); + } + // Customization Point for value template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(value_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } // Customization Point for document reference template requires custom_deserializable - [[nodiscard]] constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { + simdjson_warn_unused constexpr /* error_code */ auto operator()(document_reference_type &object, T& output) const noexcept(nothrow_custom_deserializable) { return tag_invoke(*this, object, output); } @@ -112170,7 +127373,7 @@ inline constexpr struct deserialize_tag { } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/deserialize.h for lasx */ /* including simdjson/generic/ondemand/value_iterator.h for lasx: #include "simdjson/generic/ondemand/value_iterator.h" */ @@ -112302,7 +127505,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_object() noexcept; /** @@ -112312,7 +127516,8 @@ class value_iterator { * * @returns Whether the object had any fields (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_object() noexcept; @@ -112434,7 +127639,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_array() noexcept; /** @@ -112444,7 +127650,8 @@ class value_iterator { * * @returns Whether the array had any elements (returns false for empty). * @error INCOMPLETE_ARRAY_OR_OBJECT If there are no more tokens (implying the *parent* - * array or object is incomplete). + * array or object is incomplete). An INCOMPLETE_ARRAY_OR_OBJECT is an unrecoverable error that + * invalidates the document. */ simdjson_warn_unused simdjson_inline simdjson_result started_root_array() noexcept; @@ -112706,13 +127913,14 @@ class value { * * You may use get_double(), get_bool(), get_uint64(), get_int64(), * get_object(), get_array(), get_raw_json_string(), or get_string() instead. + * When SIMDJSON_SUPPORTS_CONCEPTS is set, custom types are also supported. * * @returns A value of the given type, parsed from the JSON. * @returns INCORRECT_TYPE If the JSON value is not the given type. */ template simdjson_inline simdjson_result get() -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -112729,6 +127937,7 @@ class value { * Get this value as the given type. * * Supported types: object, array, raw_json_string, string_view, uint64_t, int64_t, double, bool + * If the macro SIMDJSON_SUPPORTS_CONCEPTS is set, then custom types are also supported. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. * @returns INCORRECT_TYPE If the JSON value is not an object. @@ -112736,28 +127945,47 @@ class value { */ template simdjson_inline error_code get(T &out) -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION - if constexpr (custom_deserializable) { + #if SIMDJSON_SUPPORTS_CONCEPTS + if constexpr (custom_deserializable) { return deserialize(*this, out); - } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); - static_cast(out); // to get rid of unused errors - return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION - } + } else if constexpr (concepts::optional_type) { + using value_type = typename std::remove_cvref_t::value_type; + + // Check if the value is null + if (is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } + + if (!out) { + out.emplace(); + } + return get(out.value()); + } else { + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; + } +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; #endif } @@ -113468,7 +128696,22 @@ struct simdjson_result : public lasx::implementation_simd simdjson_result operator[](int) noexcept = delete; /** - * Get the type of this JSON value. + * Get the type of this JSON value. It does not validate or consume the value. + * E.g., you must still call "is_null()" to check that a value is null even if + * "type()" returns json_type::null. + * + * Given a valid JSON document, the answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just @@ -114080,6 +129323,7 @@ namespace ondemand { * The type of a JSON value. */ enum class json_type { + unknown=0, // Start at 1 to catch uninitialized / default values more easily array=1, ///< A JSON array ( [ 1, 2, 3 ... ] ) object, ///< A JSON object ( { "a": 1, "b" 2, ... } ) @@ -114286,6 +129530,12 @@ class raw_json_string { */ simdjson_inline const char * raw() const noexcept; + /** + * Get the character at index i. This is unchecked. + * [0] when the string is of length 0 returns the final quote ("). + */ + simdjson_inline char operator[](size_t i) const noexcept; + /** * This compares the current instance to the std::string_view target: returns true if * they are byte-by-byte equal (no escaping is done) on target.size() characters, @@ -114425,10 +129675,10 @@ struct simdjson_result : public lasx::implement simdjson_inline ~simdjson_result() noexcept = default; ///< @private simdjson_inline simdjson_result raw() const noexcept; + simdjson_inline char operator[](size_t) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape(lasx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept; simdjson_inline simdjson_warn_unused simdjson_result unescape_wobbly(lasx::ondemand::json_iterator &iter) const noexcept; }; - } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_RAW_JSON_STRING_H @@ -114444,6 +129694,7 @@ struct simdjson_result : public lasx::implement /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ #include +#include namespace simdjson { namespace lasx { @@ -114562,7 +129813,9 @@ class parser { simdjson_warn_unused simdjson_result iterate(std::string_view json, size_t capacity) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const std::string &json) & noexcept; - /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ + /** @overload simdjson_result iterate(padded_string_view json) & noexcept + The string instance might be have its capacity extended. Note that this can still + result in AddressSanitizer: container-overflow in some cases. */ simdjson_warn_unused simdjson_result iterate(std::string &json) & noexcept; /** @overload simdjson_result iterate(padded_string_view json) & noexcept */ simdjson_warn_unused simdjson_result iterate(const simdjson_result &json) & noexcept; @@ -114650,6 +129903,11 @@ class parser { * Setting batch_size to excessively large or excessively small values may impact negatively the * performance. * + * ### Threads + * + * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the + * hood to do some lookahead. + * * ### REQUIRED: Buffer Padding * * The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what @@ -114657,10 +129915,10 @@ class parser { * using a sanitizer that verifies that no uninitialized byte is read, then you should initialize the * SIMDJSON_PADDING bytes to avoid runtime warnings. * - * ### Threads + * This is checked automatically with all iterate_many function calls, except for the two + * that take pointers (const char* or const uint8_t*). * - * When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the - * hood to do some lookahead. + * ### Threads * * ### Parser Capacity * @@ -114686,14 +129944,16 @@ class parser { */ inline simdjson_result iterate_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ + inline simdjson_result iterate_many(padded_string_view json, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const std::string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe + /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) + the string might be automatically padded with up to SIMDJSON_PADDING whitespace characters */ + inline simdjson_result iterate_many(std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; /** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */ inline simdjson_result iterate_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE, bool allow_comma_separated = false) noexcept; - inline simdjson_result iterate_many(const padded_string &&s, size_t batch_size, bool allow_comma_separated = false) = delete;// unsafe - /** @private We do not want to allow implicit conversion from C string to std::string. */ simdjson_result iterate_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete; @@ -114795,13 +130055,39 @@ class parser { bool string_buffer_overflow(const uint8_t *string_buf_loc) const noexcept; #endif + /** + * Get a unique parser instance corresponding to the current thread. + * This instance can be safely used within the current thread, but it should + * not be passed to other threads. + * + * A parser should only be used for one document at a time. + * + * Our simdjson::from functions use this parser instance. + * + * You can free the related parser by calling release_parser(). + */ + static simdjson_inline simdjson_warn_unused ondemand::parser& get_parser(); + /** + * Release the parser instance initialized by get_parser() and all the + * associated resources (memory). Returns true if a parser instance + * was released. + */ + static simdjson_inline bool release_parser(); + private: + friend bool release_parser(); + friend ondemand::parser& get_parser(); + /** Get the thread-local parser instance, allocates it if needed */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_parser_instance(); + /** Get the thread-local parser instance, it might be null */ + static simdjson_inline simdjson_warn_unused std::unique_ptr& get_threadlocal_parser_if_exists(); /** @private [for benchmarking access] The implementation to use */ std::unique_ptr implementation{}; size_t _capacity{0}; size_t _max_capacity; size_t _max_depth{DEFAULT_MAX_DEPTH}; std::unique_ptr string_buf{}; + #if SIMDJSON_DEVELOPMENT_CHECKS std::unique_ptr start_positions{}; #endif @@ -114965,6 +130251,37 @@ class array { * - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length */ simdjson_inline simdjson_result at(size_t index) noexcept; + +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this array as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON array is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the array, no matter where you are right now. @@ -115043,7 +130360,28 @@ struct simdjson_result : public lasx::implementation_simd simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -115088,7 +130426,8 @@ class array_iterator { * * Part of the std::iterator interface. */ - simdjson_inline simdjson_result operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. + simdjson_inline simdjson_result + operator*() noexcept; // MUST ONLY BE CALLED ONCE PER ITERATION. /** * Check if we are at the end of the JSON. * @@ -115112,6 +130451,11 @@ class array_iterator { */ simdjson_inline array_iterator &operator++() noexcept; + /** + * Check if the array is at the end. + */ + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; + private: value_iterator iter{}; @@ -115130,7 +130474,6 @@ namespace simdjson { template<> struct simdjson_result : public lasx::implementation_simdjson_result_base { -public: simdjson_inline simdjson_result(lasx::ondemand::array_iterator &&value) noexcept; ///< @private simdjson_inline simdjson_result(error_code error) noexcept; ///< @private simdjson_inline simdjson_result() noexcept = default; @@ -115143,6 +130486,8 @@ struct simdjson_result : public lasx::implementa simdjson_inline bool operator==(const simdjson_result &) const noexcept; simdjson_inline bool operator!=(const simdjson_result &) const noexcept; simdjson_inline simdjson_result &operator++() noexcept; + + simdjson_warn_unused simdjson_inline bool at_end() const noexcept; }; } // namespace simdjson @@ -115336,7 +130681,7 @@ class document { */ template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -115359,7 +130704,7 @@ class document { */ template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -115377,35 +130722,42 @@ class document { * Be mindful that the document instance must remain in scope while you are accessing object, array and value instances. * * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. - * @returns INCORRECT_TYPE If the JSON value is not an object. + * @returns INCORRECT_TYPE If the JSON value is of the given type. * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_deprecated simdjson_inline error_code get(T &out) && noexcept; @@ -115468,7 +130820,7 @@ class document { * time it parses a document or when it is destroyed. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator std::string_view() noexcept(false); + simdjson_inline operator std::string_view() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a raw_json_string. * @@ -115477,7 +130829,7 @@ class document { * @returns A pointer to the raw JSON for the given string. * @exception simdjson_error(INCORRECT_TYPE) if the JSON value is not a string. */ - simdjson_inline operator raw_json_string() noexcept(false); + simdjson_inline operator raw_json_string() noexcept(false) simdjson_lifetime_bound; /** * Cast this JSON value to a bool. * @@ -115627,11 +130979,27 @@ class document { * E.g., you must still call "is_null()" to check that a value is null even if * "type()" returns json_type::null. * + * The answer can be one of + * simdjson::ondemand::json_type::object, + * simdjson::ondemand::json_type::array, + * simdjson::ondemand::json_type::string, + * simdjson::ondemand::json_type::number, + * simdjson::ondemand::json_type::boolean, + * simdjson::ondemand::json_type::null. + * + * Starting with simdjson 4.0, this function will return simdjson::ondemand::json_type::unknown + * given a bad token. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. + * * NOTE: If you're only expecting a value to be one type (a typical case), it's generally * better to just call .get_double, .get_string, etc. and check for INCORRECT_TYPE (or just * let it throw an exception). * - * @error TAPE_ERROR when the JSON value is a bad token like "}" "," or "alse". + * Prior to simdjson 4.0, this function would return an error given a bad token. + * Starting with simdjson 4.0, it will return simdjson::ondemand::json_type::unknown. + * This allows you to identify a case such as {"key": NaN} and identify the NaN value. + * The simdjson::ondemand::json_type::unknown value should only happen with non-valid JSON. */ simdjson_inline simdjson_result type() noexcept; @@ -115921,7 +131289,7 @@ class document_reference { simdjson_inline simdjson_result is_null() noexcept; template simdjson_inline simdjson_result get() & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -115934,7 +131302,7 @@ class document_reference { } template simdjson_inline simdjson_result get() && -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept @@ -115957,30 +131325,37 @@ class document_reference { */ template simdjson_inline error_code get(T &out) & -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS noexcept(custom_deserializable ? nothrow_custom_deserializable : true) #else noexcept #endif { -#if SIMDJSON_SUPPORTS_DESERIALIZATION +#if SIMDJSON_SUPPORTS_CONCEPTS if constexpr (custom_deserializable) { return deserialize(*this, out); } else { -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION - // Unless the simdjson library or the user provides an inline implementation, calling this method should - // immediately fail. - static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " - "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " - "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " - " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." - " You may also add support for custom types, see our documentation."); + static_assert(!sizeof(T), "The get method with type T is not implemented by the simdjson library. " + "And you do not seem to have added support for it. Indeed, we have that " + "simdjson::custom_deserializable is false and the type T is not a default type " + "such as ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, or bool."); static_cast(out); // to get rid of unused errors return UNINITIALIZED; -#if SIMDJSON_SUPPORTS_DESERIALIZATION } -#endif +#else // SIMDJSON_SUPPORTS_CONCEPTS + // Unless the simdjson library or the user provides an inline implementation, calling this method should + // immediately fail. + static_assert(!sizeof(T), "The get method with given type is not implemented by the simdjson library. " + "The supported types are ondemand::object, ondemand::array, raw_json_string, std::string_view, uint64_t, " + "int64_t, double, and bool. We recommend you use get_double(), get_bool(), get_uint64(), get_int64(), " + " get_object(), get_array(), get_raw_json_string(), or get_string() instead of the get template." + " You may also add support for custom types, see our documentation."); + static_cast(out); // to get rid of unused errors + return UNINITIALIZED; +#endif // SIMDJSON_SUPPORTS_CONCEPTS } + /** @overload template error_code get(T &out) & noexcept */ template simdjson_inline error_code get(T &out) && noexcept; simdjson_inline simdjson_result raw_json() noexcept; @@ -116065,6 +131440,9 @@ struct simdjson_result : public lasx::implementation_s template simdjson_inline error_code get(T &out) & noexcept; template simdjson_inline error_code get(T &out) && noexcept; #if SIMDJSON_EXCEPTIONS + + using lasx::implementation_simdjson_result_base::operator*; + using lasx::implementation_simdjson_result_base::operator->; template ::value == false>::type> explicit simdjson_inline operator T() noexcept(false); simdjson_inline operator lasx::ondemand::array() & noexcept(false); @@ -116323,6 +131701,7 @@ class document_stream { * Default constructor. */ simdjson_inline iterator() noexcept; + simdjson_inline iterator(const iterator &other) noexcept = default; /** * Get the current document (or error). */ @@ -116336,6 +131715,7 @@ class document_stream { * @param other the end iterator to compare to. */ simdjson_inline bool operator!=(const iterator &other) const noexcept; + simdjson_inline bool operator==(const iterator &other) const noexcept; /** * @private * @@ -116379,6 +131759,11 @@ class document_stream { */ inline error_code error() const noexcept; + /** + * Returns whether the iterator is at the end. + */ + inline bool at_end() const noexcept; + private: simdjson_inline iterator(document_stream *s, bool finished) noexcept; /** The document_stream we're iterating through. */ @@ -116390,6 +131775,7 @@ class document_stream { friend class document_stream; friend class json_iterator; }; + using iterator = document_stream::iterator; /** * Start iterating the documents in the stream. @@ -116850,6 +132236,36 @@ class object { */ simdjson_inline simdjson_result raw_json() noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + /** + * Get this object as the given type. + * + * @param out This is set to a value of the given type, parsed from the JSON. If there is an error, this may not be initialized. + * @returns INCORRECT_TYPE If the JSON object is not of the given type. + * @returns SUCCESS If the parse succeeded and the out parameter was set to the value. + */ + template + simdjson_inline error_code get(T &out) + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) { + static_assert(custom_deserializable); + return deserialize(*this, out); + } + /** + * Get this array as the given type. + * + * @returns A value of the given type, parsed from the JSON. + * @returns INCORRECT_TYPE If the JSON value is not the given type. + */ + template + simdjson_inline simdjson_result get() + noexcept(custom_deserializable ? nothrow_custom_deserializable : true) + { + static_assert(std::is_default_constructible::value, "The specified type is not default constructible."); + T out{}; + SIMDJSON_TRY(get(out)); + return out; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS protected: /** * Go to the end of the object, no matter where you are right now. @@ -116893,12 +132309,32 @@ struct simdjson_result : public lasx::implementation_sim simdjson_inline simdjson_result operator[](std::string_view key) && noexcept; simdjson_inline simdjson_result at_pointer(std::string_view json_pointer) noexcept; simdjson_inline simdjson_result at_path(std::string_view json_path) noexcept; - inline simdjson_result reset() noexcept; inline simdjson_result is_empty() noexcept; inline simdjson_result count_fields() & noexcept; inline simdjson_result raw_json() noexcept; + #if SIMDJSON_SUPPORTS_CONCEPTS + // TODO: move this code into object-inl.h + template + simdjson_inline simdjson_result get() noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + return first; + } + return first.get(); + } + template + simdjson_inline error_code get(T& out) noexcept { + if (error()) { return error(); } + if constexpr (std::is_same_v) { + out = first; + } else { + SIMDJSON_TRY( first.get(out) ); + } + return SUCCESS; + } +#endif // SIMDJSON_SUPPORTS_CONCEPTS }; } // namespace simdjson @@ -117027,6 +132463,20 @@ inline simdjson_result to_json_string(simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); inline simdjson_result to_json_string(simdjson_result x); + +#if SIMDJSON_STATIC_REFLECTION +/** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ +template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) +inline std::string to_json_string(const T& obj); +#endif + } // namespace simdjson /** @@ -117098,17 +132548,22 @@ inline std::ostream& operator<<(std::ostream& out, simdjson::simdjson_result #include +#if SIMDJSON_STATIC_REFLECTION +#include +// #include // for std::define_static_string - header not available yet +#endif namespace simdjson { template @@ -117155,6 +132610,32 @@ error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept { return SUCCESS; } +////////////////////////////// +// String deserialization +////////////////////////////// + +// just a character! +error_code tag_invoke(deserialize_tag, auto &val, char &out) noexcept { + std::string_view x; + SIMDJSON_TRY(val.get_string().get(x)); + if(x.size() != 1) { + return INCORRECT_TYPE; + } + out = x[0]; + return SUCCESS; +} + +// any string-like type (can be constructed from std::string_view) +template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(std::is_nothrow_constructible_v) { + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + out = T{str}; + return SUCCESS; +} + + /** * STL containers have several constructors including one that takes a single * size argument. Thus, some compilers (Visual Studio) will not be able to @@ -117172,9 +132653,13 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { static_assert( std::is_default_constructible_v, "The specified type inside the container must default constructible."); - lasx::ondemand::array arr; - SIMDJSON_TRY(val.get_array().get(arr)); + if constexpr (std::is_same_v, lasx::ondemand::array>) { + arr = val; + } else { + SIMDJSON_TRY(val.get_array().get(arr)); + } + for (auto v : arr) { if constexpr (concepts::returns_reference) { if (auto const err = v.get().get(concepts::emplace_one(out)); @@ -117200,6 +132685,77 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { } +/** + * We want to support std::map and std::unordered_map but only for + * string-keyed types. + */ + template + requires(!require_custom_serialization) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(false) { + using value_type = typename std::remove_cvref_t::mapped_type; + static_assert( + deserializable, + "The specified value type inside the container must itself be deserializable"); + static_assert( + std::is_default_constructible_v, + "The specified value type inside the container must default constructible."); + lasx::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + value_type this_value; + SIMDJSON_TRY(field.value().get().get(this_value)); + [[maybe_unused]] std::pair result = out.emplace(key, this_value); + // unclear what to do if the key already exists + // if (result.second == false) { + // // key already exists + // } + } + (void)out; + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, lasx::ondemand::object &obj, T &out) noexcept { + using value_type = typename std::remove_cvref_t::mapped_type; + + out.clear(); + for (auto field : obj) { + std::string_view key; + SIMDJSON_TRY(field.unescaped_key().get(key)); + + lasx::ondemand::value value_obj; + SIMDJSON_TRY(field.value().get(value_obj)); + + value_type this_value; + SIMDJSON_TRY(value_obj.get(this_value)); + out.emplace(typename T::key_type(key), std::move(this_value)); + } + return SUCCESS; +} + +template +error_code tag_invoke(deserialize_tag, lasx::ondemand::value &val, T &out) noexcept { + lasx::ondemand::object obj; + SIMDJSON_TRY(val.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, lasx::ondemand::document &doc, T &out) noexcept { + lasx::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + +template +error_code tag_invoke(deserialize_tag, lasx::ondemand::document_reference &doc, T &out) noexcept { + lasx::ondemand::object obj; + SIMDJSON_TRY(doc.get_object().get(obj)); + return simdjson::deserialize(obj, out); +} + /** * This CPO (Customization Point Object) will help deserialize into @@ -117241,17 +132797,16 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser /** * This CPO (Customization Point Object) will help deserialize into optional types. */ -template +template requires(!require_custom_serialization) -error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deserializable::value_type, ValT>) { +error_code tag_invoke(deserialize_tag, auto &val, T &out) noexcept(nothrow_deserializable::value_type, decltype(val)>) { using value_type = typename std::remove_cvref_t::value_type; - static_assert( - deserializable, - "The specified type inside the unique_ptr must itself be deserializable"); - static_assert( - std::is_default_constructible_v, - "The specified type inside the unique_ptr must default constructible."); + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullopt + return SUCCESS; + } if (!out) { out.emplace(); @@ -117260,10 +132815,295 @@ error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept(nothrow_deser return SUCCESS; } + +#if SIMDJSON_STATIC_REFLECTION + + +template +constexpr bool user_defined_type = (std::is_class_v +&& !std::is_same_v && !std::is_same_v && !concepts::optional_type && +!concepts::appendable_containers && !require_custom_serialization); + + +template + requires(user_defined_type && std::is_class_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { + lasx::ondemand::object obj; + if constexpr (std::is_same_v, lasx::ondemand::object>) { + obj = val; + } else { + SIMDJSON_TRY(val.get_object().get(obj)); + } + error_code e = simdjson::SUCCESS; + template for (constexpr auto mem : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if constexpr (!std::meta::is_const(mem) && std::meta::is_public(mem)) { + constexpr std::string_view key = std::define_static_string(std::meta::identifier_of(mem)); + // Note: removed static assert as optional types are now handled generically + // as long we are succesful or the field is not found, we continue + if(e == simdjson::SUCCESS || e == simdjson::NO_SUCH_FIELD) { + e = obj[key].get(out.[:mem:]); + } + } + }; + return e; +} + +// Support for enum deserialization - deserialize from string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +error_code tag_invoke(deserialize_tag, ValT &val, T &out) noexcept { +#if SIMDJSON_STATIC_REFLECTION + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + if (str == std::meta::identifier_of(enum_val)) { + out = [:enum_val:]; + return SUCCESS; + } + }; + + return INCORRECT_TYPE; +#else + // Fallback: deserialize as integer if reflection not available + std::underlying_type_t int_val; + SIMDJSON_TRY(val.get(int_val)); + out = static_cast(int_val); + return SUCCESS; +#endif +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::unique_ptr &out) noexcept { + if (!out) { + out = std::make_unique(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +template + requires(user_defined_type>) +error_code tag_invoke(deserialize_tag, simdjson_value &val, std::shared_ptr &out) noexcept { + if (!out) { + out = std::make_shared(); + if (!out) { + return MEMALLOC; + } + } + if (auto err = val.get(*out)) { + out.reset(); + return err; + } + return SUCCESS; +} + +#endif // SIMDJSON_STATIC_REFLECTION + +//////////////////////////////////////// +// Unique pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_unique(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Shared pointers +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_bool().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_int64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_uint64().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_double().get(*out)); + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + if (val.is_null()) { + out.reset(); + return SUCCESS; + } + if (!out) { + out = std::make_shared(); + if (!out) { return MEMALLOC; } + } + SIMDJSON_TRY(val.get_string().get(*out)); + return SUCCESS; +} + + +//////////////////////////////////////// +// Explicit optional specializations +//////////////////////////////////////// + +//////////////////////////////////////// +// Explicit smart pointer specializations for string and int types +//////////////////////////////////////// +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::shared_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_shared(); + } + std::string_view str; + SIMDJSON_TRY(val.get_string().get(str)); + *out = std::string{str}; + return SUCCESS; +} + +error_code tag_invoke(deserialize_tag, auto &val, std::unique_ptr &out) noexcept { + // Check if the value is null + if (val.is_null()) { + out.reset(); // Set to nullptr + return SUCCESS; + } + + if (!out) { + out = std::make_unique(); + } + int64_t temp; + SIMDJSON_TRY(val.get_int64().get(temp)); + *out = static_cast(temp); + return SUCCESS; +} + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_DESERIALIZE_H -#endif // SIMDJSON_SUPPORTS_DESERIALIZATION +#endif // SIMDJSON_SUPPORTS_CONCEPTS /* end file simdjson/generic/ondemand/std_deserialize.h for lasx */ // Inline definitions @@ -117547,6 +133387,9 @@ simdjson_inline array_iterator &array_iterator::operator++() noexcept { return *this; } +simdjson_inline bool array_iterator::at_end() const noexcept { + return iter.at_end(); +} } // namespace ondemand } // namespace lasx } // namespace simdjson @@ -117583,7 +133426,9 @@ simdjson_inline simdjson_result &simdjson_result ++(first); return *this; } - +simdjson_inline bool simdjson_result::at_end() const noexcept { + return !first.iter.is_valid() || first.at_end(); +} } // namespace simdjson #endif // SIMDJSON_GENERIC_ONDEMAND_ARRAY_ITERATOR_INL_H @@ -118341,8 +134186,8 @@ simdjson_inline document::operator object() & noexcept(false) { return get_objec simdjson_inline document::operator uint64_t() noexcept(false) { return get_uint64(); } simdjson_inline document::operator int64_t() noexcept(false) { return get_int64(); } simdjson_inline document::operator double() noexcept(false) { return get_double(); } -simdjson_inline document::operator std::string_view() noexcept(false) { return get_string(false); } -simdjson_inline document::operator raw_json_string() noexcept(false) { return get_raw_json_string(); } +simdjson_inline document::operator std::string_view() noexcept(false) simdjson_lifetime_bound { return get_string(false); } +simdjson_inline document::operator raw_json_string() noexcept(false) simdjson_lifetime_bound { return get_raw_json_string(); } simdjson_inline document::operator bool() noexcept(false) { return get_bool(); } simdjson_inline document::operator value() noexcept(false) { return get_value(); } @@ -119296,10 +135141,19 @@ simdjson_inline document_stream::iterator& document_stream::iterator::operator++ return *this; } +simdjson_inline bool document_stream::iterator::at_end() const noexcept { + return finished; +} + + simdjson_inline bool document_stream::iterator::operator!=(const document_stream::iterator &other) const noexcept { return finished != other.finished; } +simdjson_inline bool document_stream::iterator::operator==(const document_stream::iterator &other) const noexcept { + return finished == other.finished; +} + simdjson_inline document_stream::iterator document_stream::begin() noexcept { start(); // If there are no documents, we're finished. @@ -119899,6 +135753,8 @@ simdjson_inline void json_iterator::assert_valid_position(token_position positio #ifndef SIMDJSON_CLANG_VISUAL_STUDIO SIMDJSON_ASSUME( position >= &parser->implementation->structural_indexes[0] ); SIMDJSON_ASSUME( position < &parser->implementation->structural_indexes[parser->implementation->n_structural_indexes] ); +#else + (void)position; // Suppress unused parameter warning #endif } @@ -120747,6 +136603,7 @@ simdjson_inline simdjson_result simdjson_result parser::iterate(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -120961,7 +136818,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(p #ifdef SIMDJSON_EXPERIMENTAL_ALLOW_INCOMPLETE_JSON simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_allow_incomplete_json(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -120993,10 +136850,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(s } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(std::string &json) & noexcept { - if(json.capacity() - json.size() < SIMDJSON_PADDING) { - json.reserve(json.size() + SIMDJSON_PADDING); - } - return iterate(padded_string_view(json)); + return iterate(pad_with_reserve(json)); } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(const std::string &json) & noexcept { @@ -121018,7 +136872,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iterate(c } simdjson_warn_unused simdjson_inline simdjson_result parser::iterate_raw(padded_string_view json) & noexcept { - if (json.padding() < SIMDJSON_PADDING) { return INSUFFICIENT_PADDING; } + if (!json.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } json.remove_utf8_bom(); @@ -121033,6 +136887,7 @@ simdjson_warn_unused simdjson_inline simdjson_result parser::iter } inline simdjson_result parser::iterate_many(const uint8_t *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. if(batch_size < MINIMAL_BATCH_SIZE) { batch_size = MINIMAL_BATCH_SIZE; } if((len >= 3) && (std::memcmp(buf, "\xEF\xBB\xBF", 3) == 0)) { buf += 3; @@ -121041,16 +136896,24 @@ inline simdjson_result parser::iterate_many(const uint8_t *buf, if(allow_comma_separated && batch_size < len) { batch_size = len; } return document_stream(*this, buf, len, batch_size, allow_comma_separated); } + inline simdjson_result parser::iterate_many(const char *buf, size_t len, size_t batch_size, bool allow_comma_separated) noexcept { + // Warning: no check is done on the buffer padding. We trust the user. return iterate_many(reinterpret_cast(buf), len, batch_size, allow_comma_separated); } -inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { +inline simdjson_result parser::iterate_many(padded_string_view s, size_t batch_size, bool allow_comma_separated) noexcept { + if (!s.has_sufficient_padding()) { return INSUFFICIENT_PADDING; } return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); } inline simdjson_result parser::iterate_many(const padded_string &s, size_t batch_size, bool allow_comma_separated) noexcept { - return iterate_many(s.data(), s.length(), batch_size, allow_comma_separated); + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(const std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(padded_string_view(s), batch_size, allow_comma_separated); +} +inline simdjson_result parser::iterate_many(std::string &s, size_t batch_size, bool allow_comma_separated) noexcept { + return iterate_many(pad(s), batch_size, allow_comma_separated); } - simdjson_pure simdjson_inline size_t parser::capacity() const noexcept { return _capacity; } @@ -121085,6 +136948,34 @@ simdjson_inline simdjson_warn_unused simdjson_result parser::u return result; } +simdjson_inline simdjson_warn_unused ondemand::parser& parser::get_parser() { + return *parser::get_parser_instance(); +} + +simdjson_inline bool release_parser() { + auto &parser_instance = parser::get_threadlocal_parser_if_exists(); + if (parser_instance) { + parser_instance.reset(); + return true; + } + return false; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_parser_instance() { + std::unique_ptr& parser_instance = get_threadlocal_parser_if_exists(); + if (!parser_instance) { + parser_instance.reset(new ondemand::parser()); + } + return parser_instance; +} + +simdjson_inline simdjson_warn_unused std::unique_ptr& parser::get_threadlocal_parser_if_exists() { + // @the-moisrex points out that this could be implemented with std::optional (C++17). + thread_local std::unique_ptr parser_instance = nullptr; + return parser_instance; +} + + } // namespace ondemand } // namespace lasx } // namespace simdjson @@ -121119,8 +137010,13 @@ namespace ondemand { simdjson_inline raw_json_string::raw_json_string(const uint8_t * _buf) noexcept : buf{_buf} {} -simdjson_inline const char * raw_json_string::raw() const noexcept { return reinterpret_cast(buf); } +simdjson_inline const char * raw_json_string::raw() const noexcept { + return reinterpret_cast(buf); +} +simdjson_inline char raw_json_string::operator[](size_t i) const noexcept { + return reinterpret_cast(buf)[i]; +} simdjson_inline bool raw_json_string::is_free_from_unescaped_quote(std::string_view target) noexcept { size_t pos{0}; @@ -121297,6 +137193,10 @@ simdjson_inline simdjson_result simdjson_result::operator[](size_t i) const noexcept { + if (error()) { return error(); } + return first[i]; +} simdjson_inline simdjson_warn_unused simdjson_result simdjson_result::unescape(lasx::ondemand::json_iterator &iter, bool allow_replacement) const noexcept { if (error()) { return error(); } return first.unescape(iter, allow_replacement); @@ -121322,6 +137222,9 @@ simdjson_inline simdjson_warn_unused simdjson_result simdjson_ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/object.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/serialization.h" */ /* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/value.h" */ +/* amalgamation skipped (editor-only): #if SIMDJSON_STATIC_REFLECTION */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/ondemand/json_builder.h" */ +/* amalgamation skipped (editor-only): #endif */ /* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ namespace simdjson { @@ -122424,19 +138327,23 @@ simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get } return result; } + simdjson_warn_unused simdjson_inline simdjson_result value_iterator::get_root_bool(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("bool"); - uint8_t tmpbuf[5+1+1]; // +1 for null termination - tmpbuf[5+1] = '\0'; // make sure that buffer is always null terminated. - if (!_json_iter->copy_to_buffer(json, max_len, tmpbuf, 5+1)) { return incorrect_type_error("Not a boolean"); } - auto result = parse_bool(tmpbuf); - if(result.error() == SUCCESS) { - if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } - advance_root_scalar("bool"); - } - return result; + // We have a boolean if we have either "true" or "false" and the next character is either + // a structural character or whitespace. We also check that the length is correct: + // "true" and "false" are 4 and 5 characters long, respectively. + bool value_true = (max_len >= 4 && !atomparsing::str4ncmp(json, "true") && + (max_len == 4 || jsoncharutils::is_structural_or_whitespace(json[4]))); + bool value_false = (max_len >= 5 && !atomparsing::str4ncmp(json, "false") && + (max_len == 5 || jsoncharutils::is_structural_or_whitespace(json[5]))); + if(value_true == false && value_false == false) { return incorrect_type_error("Not a boolean"); } + if (check_trailing && !_json_iter->is_single_token()) { return TRAILING_CONTENT; } + advance_root_scalar("bool"); + return value_true; } + simdjson_inline simdjson_result value_iterator::is_root_null(bool check_trailing) noexcept { auto max_len = peek_root_length(); auto json = peek_root_scalar("null"); @@ -122699,7 +138606,7 @@ simdjson_inline simdjson_result value_iterator::type() const noexcept case '5': case '6': case '7': case '8': case '9': return json_type::number; default: - return TAPE_ERROR; + return json_type::unknown; } } @@ -122739,6 +138646,1221 @@ simdjson_inline simdjson_result::simdjson_result #endif // SIMDJSON_GENERIC_ONDEMAND_VALUE_ITERATOR_INL_H /* end file simdjson/generic/ondemand/value_iterator-inl.h for lasx */ +// JSON builder, ideally they should not be part of the ondemand directory +// but it is convenient for now to have them here. +/* including simdjson/generic/ondemand/json_string_builder.h for lasx: #include "simdjson/generic/ondemand/json_string_builder.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder.h for lasx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/implementation_simdjson_result_base.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +namespace simdjson { +namespace lasx { +namespace builder { + +/** + * A builder for JSON strings representing documents. This is a low-level + * builder that is not meant to be used directly by end-users. Though it + * supports atomic types (Booleans, strings), it does not support composed + * types (arrays and objects). + * + * Ultimately, this class can support kernel-specific optimizations. E.g., + * it may make use of SIMD instructions to escape strings faster. + */ +class string_builder { +public: + simdjson_inline string_builder(size_t initial_capacity = 1024); + + /** + * Append number (includes Booleans). Booleans are mapped to the strings + * false and true. Numbers are converted to strings abiding by the JSON standard. + * Floating-point numbers are converted to the shortest string that 'correctly' + * represents the number. + */ + template::value>::type> + simdjson_inline void append(number_type v) noexcept; + + /** + * Append character c. + */ + simdjson_inline void append(char c) noexcept; + + /** + * Append the string 'null'. + */ + simdjson_inline void append_null() noexcept; + + /** + * Clear the content. + */ + simdjson_inline void clear() noexcept; + + /** + * Append the std::string_view, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append(std::string_view input) noexcept; + + /** + * Append the std::string_view surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(std::string_view input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(char input) noexcept; + + /** + * Append the character surrounded by double quotes, after escaping it. + * There is no UTF-8 validation. + */ + simdjson_inline void escape_and_append_with_quotes(const char* input) noexcept; + + /** + * Append the C string directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *c) noexcept; + + /** + * Append "{" to the buffer. + */ + simdjson_inline void start_object() noexcept; + + /** + * Append "}" to the buffer. + */ + simdjson_inline void end_object() noexcept; + + /** + * Append "[" to the buffer. + */ + simdjson_inline void start_array() noexcept; + + /** + * Append "]" to the buffer. + */ + simdjson_inline void end_array() noexcept; + + /** + * Append "," to the buffer. + */ + simdjson_inline void append_comma() noexcept; + + /** + * Append ":" to the buffer. + */ + simdjson_inline void append_colon() noexcept; + + /** + * Append a key-value pair to the buffer. + * The key is escaped and surrounded by double quotes. + * The value is escaped if it is a string. + */ + template + simdjson_inline void append_key_value(key_type key, value_type value) noexcept; +#if SIMDJSON_SUPPORTS_CONCEPTS + // Support for optional types (std::optional, etc.) + template + simdjson_inline void append(const T &opt); + + // Support for string-like types + template + requires(std::is_convertible::value || + std::is_same::value ) + simdjson_inline void append(const T &value); +#endif +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) + template +requires (!std::is_convertible::value) + simdjson_inline void append(const R &range) noexcept; +#endif + /** + * Append the std::string_view directly, without escaping. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(std::string_view input) noexcept; + + /** + * Append len characters from str. + * There is no UTF-8 validation. + */ + simdjson_inline void append_raw(const char *str, size_t len) noexcept; +#if SIMDJSON_EXCEPTIONS + /** + * Creates an std::string from the written JSON buffer. + * Throws if memory allocation failed + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string() const noexcept(false); + + /** + * Creates an std::string_view from the written JSON buffer. + * Throws if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content if needed. + */ + simdjson_inline operator std::string_view() const noexcept(false) simdjson_lifetime_bound; +#endif + + /** + * Returns a view on the written JSON buffer. Returns an error + * if memory allocation failed. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result view() const noexcept; + + /** + * Appends the null character to the buffer and returns + * a pointer to the beginning of the written JSON buffer. + * Returns an error if memory allocation failed. + * The result is null-terminated. + * + * The result may not be valid UTF-8 if some of your content was not valid UTF-8. + * Use validate_unicode() to check the content. + */ + simdjson_inline simdjson_result c_str() noexcept; + + /** + * Return true if the content is valid UTF-8. + */ + simdjson_inline bool validate_unicode() const noexcept; + + /** + * Returns the current size of the written JSON buffer. + * If an error occurred, returns 0. + */ + simdjson_inline size_t size() const noexcept; + +private: + /** + * Returns true if we can write at least upcoming_bytes bytes. + * The underlying buffer is reallocated if needed. It is designed + * to be called before writing to the buffer. It should be fast. + */ + simdjson_inline bool capacity_check(size_t upcoming_bytes); + + /** + * Grow the buffer to at least desired_capacity bytes. + * If the allocation fails, is_valid is set to false. We expect + * that this function would not be repeatedly called. + */ + simdjson_inline void grow_buffer(size_t desired_capacity); + + /** + * We use this helper function to make sure that is_valid is kept consistent. + */ + simdjson_inline void set_valid(bool valid) noexcept; + + std::unique_ptr buffer{}; + size_t position{0}; + size_t capacity{0}; + bool is_valid{true}; +}; + + + +} +} + + +#if !SIMDJSON_STATIC_REFLECTION +// fallback implementation until we have static reflection +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + simdjson::lasx::builder::string_builder b(initial_capacity); + b.append(z); + std::string_view s; + auto e = b.view().get(s); + if(e) { return e; } + return std::string(s); +} +#endif + + +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_H +/* end file simdjson/generic/ondemand/json_string_builder.h for lasx */ +/* including simdjson/generic/ondemand/json_string_builder-inl.h for lasx: #include "simdjson/generic/ondemand/json_string_builder-inl.h" */ +/* begin file simdjson/generic/ondemand/json_string_builder-inl.h for lasx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand + * directory but we will move it to a builder directory later. + */ +#include +#include +#include +#ifndef SIMDJSON_GENERIC_STRING_BUILDER_INL_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_INL_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ + +/* + * Empirically, we have found that an inlined optimization is important for + * performance. The following macros are not ideal. We should find a better + * way to inline the code. + */ + +#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2)) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#define SIMDJSON_EXPERIMENTAL_HAS_SSE2 1 +#endif +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#ifndef SIMDJSON_EXPERIMENTAL_HAS_NEON +#define SIMDJSON_EXPERIMENTAL_HAS_NEON 1 +#endif +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +#include +#endif +#if SIMDJSON_EXPERIMENTAL_HAS_SSE2 +#include +#endif + +namespace simdjson { +namespace lasx { +namespace builder { + +static SIMDJSON_CONSTEXPR_LAMBDA std::array json_quotable_character = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +/** + +A possible SWAR implementation of has_json_escapable_byte. It is not used because +it is slower than the current implementation. It is kept here for reference (to show +that we tried it). + +inline bool has_json_escapable_byte(uint64_t x) { + uint64_t is_ascii = 0x8080808080808080ULL & ~x; + uint64_t xor2 = x ^ 0x0202020202020202ULL; + uint64_t lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64_t sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64_t eq92 = (sub92 - 0x0101010101010101ULL); + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} + +**/ + +SIMDJSON_CONSTEXPR_LAMBDA simdjson_inline bool +simple_needs_escaping(std::string_view v) { + for (char c : v) { + // a table lookup is faster than a series of comparisons + if(json_quotable_character[static_cast(c)]) { + return true; + } + } + return false; +} + +#if SIMDJSON_EXPERIMENTAL_HAS_NEON +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + uint8x16_t running = vdupq_n_u8(0); + uint8x16_t v34 = vdupq_n_u8(34); + uint8x16_t v92 = vdupq_n_u8(92); + + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t *)view.data() + i); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + if (i < view.size()) { + uint8x16_t word = + vld1q_u8((const uint8_t *)view.data() + view.length() - 16); + running = vorrq_u8(running, vceqq_u8(word, v34)); + running = vorrq_u8(running, vceqq_u8(word, v92)); + running = vorrq_u8(running, vcltq_u8(word, vdupq_n_u8(32))); + } + return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0; +} +#elif SIMDJSON_EXPERIMENTAL_HAS_SSE2 +simdjson_inline bool fast_needs_escaping(std::string_view view) { + if (view.size() < 16) { + return simple_needs_escaping(view); + } + size_t i = 0; + __m128i running = _mm_setzero_si128(); + for (; i + 15 < view.size(); i += 16) { + + __m128i word = _mm_loadu_si128(reinterpret_cast(view.data() + i)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128(reinterpret_cast(view.data() + view.length() - 16)); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(34))); + running = _mm_or_si128(running, _mm_cmpeq_epi8(word, _mm_set1_epi8(92))); + running = _mm_or_si128( + running, _mm_cmpeq_epi8(_mm_subs_epu8(word, _mm_set1_epi8(31)), + _mm_setzero_si128())); + } + return _mm_movemask_epi8(running) != 0; +} +#else +simdjson_inline bool fast_needs_escaping(std::string_view view) { + return simple_needs_escaping(view); +} +#endif + + +SIMDJSON_CONSTEXPR_LAMBDA inline size_t +find_next_json_quotable_character(const std::string_view view, + size_t location) noexcept { + + for (auto pos = view.begin() + location; pos != view.end(); ++pos) { + if (json_quotable_character[static_cast(*pos)]) { + return pos - view.begin(); + } + } + return size_t(view.size()); +} + +SIMDJSON_CONSTEXPR_LAMBDA static std::string_view control_chars[] = { + "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", "\\u000b", "\\f", "\\r", + "\\u000e", "\\u000f", "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", + "\\u0015", "\\u0016", "\\u0017", "\\u0018", "\\u0019", "\\u001a", "\\u001b", + "\\u001c", "\\u001d", "\\u001e", "\\u001f"}; + +// All Unicode characters may be placed within the quotation marks, except for the +// characters that MUST be escaped: quotation mark, reverse solidus, and the control +// characters (U+0000 through U+001F). +// There are two-character sequence escape representations of some popular characters: +// \", \\, \b, \f, \n, \r, \t. +SIMDJSON_CONSTEXPR_LAMBDA void escape_json_char(char c, char *&out) { + if (c == '"') { + memcpy(out, "\\\"", 2); + out += 2; + } else if (c == '\\') { + memcpy(out, "\\\\", 2); + out += 2; + } else { + std::string_view v = control_chars[uint8_t(c)]; + memcpy(out, v.data(), v.size()); + out += v.size(); + } +} + +inline size_t write_string_escaped(const std::string_view input, char *out) { + size_t mysize = input.size(); + if (!fast_needs_escaping(input)) { // fast path! + memcpy(out, input.data(), input.size()); + return input.size(); + } + const char *const initout = out; + size_t location = find_next_json_quotable_character(input, 0); + memcpy(out, input.data(), location); + out += location; + escape_json_char(input[location], out); + location += 1; + while (location < mysize) { + size_t newlocation = find_next_json_quotable_character(input, location); + memcpy(out, input.data() + location, newlocation - location); + out += newlocation - location; + location = newlocation; + if (location == mysize) { + break; + } + escape_json_char(input[location], out); + location += 1; + } + return out - initout; +} + +simdjson_inline string_builder::string_builder(size_t initial_capacity) + : buffer(new(std::nothrow) char[initial_capacity]), position(0), + capacity(buffer.get() != nullptr ? initial_capacity : 0), + is_valid(buffer.get() != nullptr) {} + +simdjson_inline bool string_builder::capacity_check(size_t upcoming_bytes) { + // We use the convention that when is_valid is false, then the capacity and + // the position are 0. + // Most of the time, this function will return true. + if (simdjson_likely(upcoming_bytes <= capacity - position)) { + return true; + } + // check for overflow, most of the time there is no overflow + if (simdjson_likely(position + upcoming_bytes < position)) { + return false; + } + // We will rarely get here. + grow_buffer((std::max)(capacity * 2, position + upcoming_bytes)); + // If the buffer allocation failed, we set is_valid to false. + return is_valid; +} + +simdjson_inline void string_builder::grow_buffer(size_t desired_capacity) { + if (!is_valid) { + return; + } + std::unique_ptr new_buffer(new (std::nothrow) char[desired_capacity]); + if (new_buffer.get() == nullptr) { + set_valid(false); + return; + } + std::memcpy(new_buffer.get(), buffer.get(), position); + buffer.swap(new_buffer); + capacity = desired_capacity; +} + +simdjson_inline void string_builder::set_valid(bool valid) noexcept { + if (!valid) { + is_valid = false; + capacity = 0; + position = 0; + buffer.reset(); + } else { + is_valid = true; + } +} + +simdjson_inline size_t string_builder::size() const noexcept { + return position; +} + +simdjson_inline void string_builder::append(char c) noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = c; + } +} + +simdjson_inline void string_builder::append_null() noexcept { + constexpr char null_literal[] = "null"; + constexpr size_t null_len = sizeof(null_literal) - 1; + if (capacity_check(null_len)) { + std::memcpy(buffer.get() + position, null_literal, null_len); + position += null_len; + } +} + +simdjson_inline void string_builder::clear() noexcept { + position = 0; + // if it was invalid, we should try to repair it + if (!is_valid) { + capacity = 0; + buffer.reset(); + is_valid = true; + } +} + +namespace internal { + +// We could specialize further for 32-bit integers. +simdjson_really_inline int int_log2(uint32_t x) { return (63 - leading_zeroes(x | 1)); } + +simdjson_really_inline int fast_digit_count(uint32_t x) { + static uint64_t table[] = { + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, + 12884901788, 12884901788, 17179868184, 17179868184, 17179868184, + 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, + 25769703776, 25769703776, 30063771072, 30063771072, 30063771072, + 34349738368, 34349738368, 34349738368, 34349738368, 38554705664, + 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, + 42949672960, 42949672960}; + return uint32_t((x + table[int_log2(x)]) >> 32); +} + +simdjson_really_inline int int_log2(uint64_t x) { return 63 - leading_zeroes(x | 1); } + +simdjson_really_inline int fast_digit_count(uint64_t x) { + static uint64_t table[] = {9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999ULL, + 9999999999999999ULL, + 99999999999999999ULL, + 999999999999999999ULL, + 9999999999999999999ULL}; + int y = (19 * int_log2(x) >> 6); + y += x > table[y]; + return y + 1; +} + +template ::value>::type> +simdjson_really_inline size_t digit_count(number_type v) noexcept { + static_assert(sizeof(number_type) == 8 || sizeof(number_type) == 4 || + sizeof(number_type) == 2 || sizeof(number_type) == 1, + "We only support 8-bit, 16-bit, 32-bit and 64-bit numbers"); + return fast_digit_count(v); +} +static const char decimal_table[200] = { + 0x30, 0x30, 0x30, 0x31, 0x30, 0x32, 0x30, 0x33, 0x30, 0x34, 0x30, 0x35, + 0x30, 0x36, 0x30, 0x37, 0x30, 0x38, 0x30, 0x39, 0x31, 0x30, 0x31, 0x31, + 0x31, 0x32, 0x31, 0x33, 0x31, 0x34, 0x31, 0x35, 0x31, 0x36, 0x31, 0x37, + 0x31, 0x38, 0x31, 0x39, 0x32, 0x30, 0x32, 0x31, 0x32, 0x32, 0x32, 0x33, + 0x32, 0x34, 0x32, 0x35, 0x32, 0x36, 0x32, 0x37, 0x32, 0x38, 0x32, 0x39, + 0x33, 0x30, 0x33, 0x31, 0x33, 0x32, 0x33, 0x33, 0x33, 0x34, 0x33, 0x35, + 0x33, 0x36, 0x33, 0x37, 0x33, 0x38, 0x33, 0x39, 0x34, 0x30, 0x34, 0x31, + 0x34, 0x32, 0x34, 0x33, 0x34, 0x34, 0x34, 0x35, 0x34, 0x36, 0x34, 0x37, + 0x34, 0x38, 0x34, 0x39, 0x35, 0x30, 0x35, 0x31, 0x35, 0x32, 0x35, 0x33, + 0x35, 0x34, 0x35, 0x35, 0x35, 0x36, 0x35, 0x37, 0x35, 0x38, 0x35, 0x39, + 0x36, 0x30, 0x36, 0x31, 0x36, 0x32, 0x36, 0x33, 0x36, 0x34, 0x36, 0x35, + 0x36, 0x36, 0x36, 0x37, 0x36, 0x38, 0x36, 0x39, 0x37, 0x30, 0x37, 0x31, + 0x37, 0x32, 0x37, 0x33, 0x37, 0x34, 0x37, 0x35, 0x37, 0x36, 0x37, 0x37, + 0x37, 0x38, 0x37, 0x39, 0x38, 0x30, 0x38, 0x31, 0x38, 0x32, 0x38, 0x33, + 0x38, 0x34, 0x38, 0x35, 0x38, 0x36, 0x38, 0x37, 0x38, 0x38, 0x38, 0x39, + 0x39, 0x30, 0x39, 0x31, 0x39, 0x32, 0x39, 0x33, 0x39, 0x34, 0x39, 0x35, + 0x39, 0x36, 0x39, 0x37, 0x39, 0x38, 0x39, 0x39, +}; +} // namespace internal + +template +simdjson_inline void string_builder::append(number_type v) noexcept { + static_assert(std::is_same::value || + std::is_integral::value || + std::is_floating_point::value, + "Unsupported number type"); + // If C++17 is available, we can 'if constexpr' here. + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + if (v) { + constexpr char true_literal[] = "true"; + constexpr size_t true_len = sizeof(true_literal) - 1; + if (capacity_check(true_len)) { + std::memcpy(buffer.get() + position, true_literal, true_len); + position += true_len; + } + } else { + constexpr char false_literal[] = "false"; + constexpr size_t false_len = sizeof(false_literal) - 1; + if (capacity_check(false_len)) { + std::memcpy(buffer.get() + position, false_literal, false_len); + position += false_len; + } + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_unsigned::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + unsigned_type pv = static_cast(v); + size_t dc = internal::digit_count(pv); + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_integral::value) { + constexpr size_t max_number_size = 20; + if (capacity_check(max_number_size)) { + using unsigned_type = typename std::make_unsigned::type; + bool negative = v < 0; + unsigned_type pv = static_cast(v); + if (negative) { + pv = 0 - pv; // the 0 is for Microsoft + } + size_t dc = internal::digit_count(pv); + if (negative) { + buffer.get()[position++] = '-'; + } + char *write_pointer = buffer.get() + position + dc - 1; + while (pv >= 100) { + memcpy(write_pointer - 1, &internal::decimal_table[(pv % 100)*2], 2); + write_pointer -= 2; + pv /= 100; + } + if (pv >= 10) { + *write_pointer-- = char('0' + (pv % 10)); + pv /= 10; + } + *write_pointer = char('0' + pv); + position += dc; + } + } + else SIMDJSON_IF_CONSTEXPR(std::is_floating_point::value) { + constexpr size_t max_number_size = 24; + if (capacity_check(max_number_size)) { + // We could specialize for float. + char *end = simdjson::internal::to_chars(buffer.get() + position, nullptr, + double(v)); + position = end - buffer.get(); + } + } +} + +simdjson_inline void +string_builder::escape_and_append(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(6 * input.size())) { + position += write_string_escaped(input, buffer.get() + position); + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(std::string_view input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * input.size())) { + buffer.get()[position++] = '"'; + position += write_string_escaped(input, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void +string_builder::escape_and_append_with_quotes(char input) noexcept { + // escaping might turn a control character into \x00xx so 6 characters. + if (capacity_check(2 + 6 * 1)) { + buffer.get()[position++] = '"'; + std::string_view cinput(&input, 1); + position += write_string_escaped(cinput, buffer.get() + position); + buffer.get()[position++] = '"'; + } +} + +simdjson_inline void string_builder::escape_and_append_with_quotes(const char* input) noexcept { + std::string_view cinput(input); + escape_and_append_with_quotes(cinput); +} + +simdjson_inline void string_builder::append_raw(const char *c) noexcept { + size_t len = std::strlen(c); + append_raw(c, len); +} + +simdjson_inline void +string_builder::append_raw(std::string_view input) noexcept { + if (capacity_check(input.size())) { + std::memcpy(buffer.get() + position, input.data(), input.size()); + position += input.size(); + } +} + +simdjson_inline void string_builder::append_raw(const char *str, + size_t len) noexcept { + if (capacity_check(len)) { + std::memcpy(buffer.get() + position, str, len); + position += len; + } +} +#if SIMDJSON_SUPPORTS_CONCEPTS +// Support for optional types (std::optional, etc.) +template +simdjson_inline void string_builder::append(const T &opt) { + if (opt) { + append(*opt); + } else { + append_null(); + } +} +template +requires(std::is_convertible::value || +std::is_same::value ) +simdjson_inline void string_builder::append(const T &value) { + escape_and_append_with_quotes(value); +} +#endif + +#if SIMDJSON_SUPPORTS_RANGES && SIMDJSON_SUPPORTS_CONCEPTS + // Support for range-based appending (std::ranges::view, etc.) +template +requires (!std::is_convertible::value) +simdjson_inline void string_builder::append(const R &range) noexcept { + auto it = std::ranges::begin(range); + auto end = std::ranges::end(range); + if constexpr (concepts::is_pair) { + start_object(); + + if (it == end) { + end_object(); + return; // Handle empty range + } + // Append first item without leading comma + append_key_value(it->first, it->second); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append_key_value(it->first, it->second); + } + end_object(); + } else { + start_array(); + if (it == end) { + end_array(); + return; // Handle empty range + } + + // Append first item without leading comma + append(*it); + ++it; + + // Append remaining items with preceding commas + for (; it != end; ++it) { + append_comma(); + append(*it); + } + end_array(); + + } +} + +#endif + +#if SIMDJSON_EXCEPTIONS +simdjson_inline string_builder::operator std::string() const noexcept(false) { + return std::string(std::string_view()); +} + +simdjson_inline string_builder::operator std::string_view() const + noexcept(false) simdjson_lifetime_bound { + return view(); +} +#endif + +simdjson_inline simdjson_result +string_builder::view() const noexcept { + if (!is_valid) { + return simdjson::OUT_OF_CAPACITY; + } + return std::string_view(buffer.get(), position); +} + +simdjson_inline simdjson_result string_builder::c_str() noexcept { + if (capacity_check(1)) { + buffer.get()[position] = '\0'; + return buffer.get(); + } + return simdjson::OUT_OF_CAPACITY; +} + +simdjson_inline bool string_builder::validate_unicode() const noexcept { + return simdjson::validate_utf8(buffer.get(), position); +} + +simdjson_inline void string_builder::start_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '{'; + } +} + + +simdjson_inline void string_builder::end_object() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '}'; + } +} + + +simdjson_inline void string_builder::start_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = '['; + } +} + + +simdjson_inline void string_builder::end_array() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ']'; + } +} + + +simdjson_inline void string_builder::append_comma() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ','; + } +} + + +simdjson_inline void string_builder::append_colon() noexcept { + if (capacity_check(1)) { + buffer.get()[position++] = ':'; + } +} + +template +simdjson_inline void string_builder::append_key_value(key_type key, value_type value) noexcept { + static_assert( + std::is_same::value || + std::is_convertible::value, + "Unsupported key type"); + escape_and_append_with_quotes(key); + append_colon(); + SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + append_null(); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_convertible::value) { + escape_and_append_with_quotes(value); + } else SIMDJSON_IF_CONSTEXPR(std::is_same::value) { + escape_and_append_with_quotes(value); + } else { + append(value); + } +} + +} // namespace builder +} // namespace lasx +} // namespace simdjson + +#endif // SIMDJSON_GENERIC_STRING_BUILDER_INL_H +/* end file simdjson/generic/ondemand/json_string_builder-inl.h for lasx */ +/* including simdjson/generic/ondemand/json_builder.h for lasx: #include "simdjson/generic/ondemand/json_builder.h" */ +/* begin file simdjson/generic/ondemand/json_builder.h for lasx */ +/** + * This file is part of the builder API. It is temporarily in the ondemand directory + * but we will move it to a builder directory later. + */ +#ifndef SIMDJSON_GENERIC_BUILDER_H + +/* amalgamation skipped (editor-only): #ifndef SIMDJSON_CONDITIONAL_INCLUDE */ +/* amalgamation skipped (editor-only): #define SIMDJSON_GENERIC_STRING_BUILDER_H */ +/* amalgamation skipped (editor-only): #include "simdjson/generic/builder/json_string_builder.h" */ +/* amalgamation skipped (editor-only): #include "simdjson/concepts.h" */ +/* amalgamation skipped (editor-only): #endif // SIMDJSON_CONDITIONAL_INCLUDE */ +#if SIMDJSON_STATIC_REFLECTION + +#include +#include +#include +#include +#include +#include +#include +#include +// #include // for std::define_static_string - header not available yet + +namespace simdjson { +namespace lasx { +namespace builder { + +// Concept that checks if a type is a container but not a string (because +// strings handling must be handled differently) +template +concept container_but_not_string = + requires(T a) { + { a.size() } -> std::convertible_to; + { + a[std::declval()] + }; // check if elements are accessible for the subscript operator + } && !std::is_same_v && + !std::is_same_v && !std::is_same_v; + +template + requires(container_but_not_string) +constexpr void atom(string_builder &b, const T &t) { + if (t.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, t[0]); + for (size_t i = 1; i < t.size(); ++i) { + b.append(','); + atom(b, t[i]); + } + b.append(']'); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + b.escape_and_append_with_quotes(t); +} + +template +constexpr void atom(string_builder &b, const T &m) { + if (m.empty()) { + b.append_raw("{}"); + return; + } + b.append('{'); + bool first = true; + for (const auto& [key, value] : m) { + if (!first) { + b.append(','); + } + first = false; + // Keys must be convertible to string_view per the concept + b.escape_and_append_with_quotes(key); + b.append(':'); + atom(b, value); + } + b.append('}'); +} + + +template::value && !std::is_same_v>::type> +constexpr void atom(string_builder &b, const number_type t) { + b.append(t); +} + +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +constexpr void atom(string_builder &b, const T &t) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^T, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, t.[:dm:]); + i++; + }; + b.append('}'); +} + +// Support for optional types (std::optional, etc.) +template +constexpr void atom(string_builder &b, const T &opt) { + if (opt) { + atom(b, opt.value()); + } else { + b.append_raw("null"); + } +} + +// Support for smart pointers (std::unique_ptr, std::shared_ptr, etc.) +template +constexpr void atom(string_builder &b, const T &ptr) { + if (ptr) { + atom(b, *ptr); + } else { + b.append_raw("null"); + } +} + +// Support for enums - serialize as string representation using expand approach from P2996R12 +template + requires(std::is_enum_v) +void atom(string_builder &b, const T &e) { +#if SIMDJSON_STATIC_REFLECTION + constexpr auto enumerators = std::define_static_array(std::meta::enumerators_of(^^T)); + template for (constexpr auto enum_val : enumerators) { + constexpr auto enum_str = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(enum_val))); + if (e == [:enum_val:]) { + b.append_raw(enum_str); + return; + } + }; + // Fallback to integer if enum value not found + atom(b, static_cast>(e)); +#else + // Fallback: serialize as integer if reflection not available + atom(b, static_cast>(e)); +#endif +} + +// Support for appendable containers that don't have operator[] (sets, etc.) +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +constexpr void atom(string_builder &b, const T &container) { + if (container.empty()) { + b.append_raw("[]"); + return; + } + b.append('['); + bool first = true; + for (const auto& item : container) { + if (!first) { + b.append(','); + } + first = false; + atom(b, item); + } + b.append(']'); +} + +// append functions that delegate to atom functions for primitive types +template + requires(std::is_arithmetic_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template + requires(!container_but_not_string && !concepts::string_view_keyed_map && + !concepts::optional_type && !concepts::smart_pointer && + !std::is_same_v && + !std::is_same_v && !std::is_same_v) +void append(string_builder &b, const T &t) { + atom(b, t); +} + +template +void append(string_builder &b, const T &t) { + atom(b, t); +} + +// works for struct +template + requires(std::is_class_v && !container_but_not_string && + !concepts::string_view_keyed_map && + !concepts::optional_type && + !concepts::smart_pointer && + !concepts::appendable_containers && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v && + !std::is_same_v) +void append(string_builder &b, const Z &z) { + int i = 0; + b.append('{'); + template for (constexpr auto dm : std::define_static_array(std::meta::nonstatic_data_members_of(^^Z, std::meta::access_context::unchecked()))) { + if (i != 0) + b.append(','); + constexpr auto key = std::define_static_string(constevalutil::consteval_to_quoted_escaped(std::meta::identifier_of(dm))); + b.append_raw(key); + b.append(':'); + atom(b, z.[:dm:]); + i++; + }; + b.append('}'); +} + +// works for container +template + requires(container_but_not_string) +void append(string_builder &b, const Z &z) { + if (z.size() == 0) { + b.append_raw("[]"); + return; + } + b.append('['); + atom(b, z[0]); + for (size_t i = 1; i < z.size(); ++i) { + b.append(','); + atom(b, z[i]); + } + b.append(']'); +} + +template +simdjson_result to_json_string(const Z &z, size_t initial_capacity = 1024) { + string_builder b(initial_capacity); + append(b, z); + std::string_view s; + if(auto e = b.view().get(s); e) { return e; } + return std::string(s); +} + +template +simdjson_error to_json(const Z &z, std::string &s) { + string_builder b; + append(b, z); + std::string_view view; + if(auto e = b.view().get(view); e) { return e; } + s.assign(view); + return SUCCESS; +} + +template +string_builder& operator<<(string_builder& b, const Z& z) { + append(b, z); + return b; +} +} // namespace builder +} // namespace lasx +// Alias the function template to 'to' in the global namespace +template +simdjson_result to_json(const Z &z, size_t initial_capacity = 1024) { + return lasx::builder::to_json_string(z, initial_capacity); +} +} // namespace simdjson + +#endif // SIMDJSON_STATIC_REFLECTION + +#endif +/* end file simdjson/generic/ondemand/json_builder.h for lasx */ /* end file simdjson/generic/ondemand/amalgamated.h for lasx */ /* including simdjson/lasx/end.h: #include "simdjson/lasx/end.h" */ @@ -122776,9 +139898,275 @@ namespace simdjson { * @copydoc simdjson::builtin::ondemand */ namespace ondemand = builtin::ondemand; + /** + * @copydoc simdjson::builtin::builder + */ + namespace builder = builtin::builder; + +#if SIMDJSON_STATIC_REFLECTION + /** + * Create a JSON string from any user-defined type using static reflection. + * Only available when SIMDJSON_STATIC_REFLECTION is enabled. + */ + template + requires(!std::same_as && + !std::same_as && + !std::same_as && + !std::same_as) + inline std::string to_json_string(const T& obj) { + builder::string_builder str_builder; + append(str_builder, obj); + std::string_view view; + if (str_builder.view().get(view) == SUCCESS) { + return std::string(view); + } + return ""; + } +#endif + } // namespace simdjson #endif // SIMDJSON_ONDEMAND_H /* end file simdjson/ondemand.h */ +/* including simdjson/convert.h: #include "simdjson/convert.h" */ +/* begin file simdjson/convert.h */ + +#ifndef SIMDJSON_CONVERT_H +#define SIMDJSON_CONVERT_H + +/* skipped duplicate #include "simdjson/ondemand.h" */ +#include + +#if SIMDJSON_SUPPORTS_CONCEPTS + + +namespace simdjson { +namespace convert { +namespace internal { + +/** + * A utility class for automatically parsing JSON documents. + * This template is NOT part of our public API. + * It is subject to changes. + * @private + */ +template +struct auto_parser { +private: + parser_type m_parser; + ondemand::document m_doc; + error_code m_error{SUCCESS}; + + template + static constexpr bool is_nothrow_gettable = requires(ondemand::document doc) { + { doc.get() } noexcept; + }; +public: + explicit auto_parser(parser_type &&parser, ondemand::document &&doc) noexcept requires(!std::is_pointer_v); + explicit auto_parser(parser_type &&parser, padded_string_view const str) noexcept requires(!std::is_pointer_v); + explicit auto_parser(std::remove_pointer_t &parser, ondemand::document &&doc) noexcept requires(std::is_pointer_v); + explicit auto_parser(std::remove_pointer_t &parser, padded_string_view const str) noexcept requires(std::is_pointer_v); + explicit auto_parser(padded_string_view const str) noexcept requires(std::is_pointer_v); + explicit auto_parser(parser_type parser, ondemand::document &&doc) noexcept requires(std::is_pointer_v); + auto_parser(auto_parser const &) = delete; + auto_parser &operator=(auto_parser const &) = delete; + auto_parser(auto_parser &&) noexcept = default; + auto_parser &operator=(auto_parser &&) noexcept = default; + ~auto_parser() = default; + + simdjson_warn_unused std::remove_pointer_t &parser() noexcept; + + template + simdjson_warn_unused simdjson_inline simdjson_result result() noexcept(is_nothrow_gettable); + + simdjson_warn_unused simdjson_inline simdjson_result array() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result object() noexcept; + simdjson_warn_unused simdjson_inline simdjson_result number() noexcept; + + template + simdjson_warn_unused simdjson_inline explicit(false) operator simdjson_result() noexcept(is_nothrow_gettable); + + template + simdjson_warn_unused simdjson_inline explicit(false) operator T() noexcept(false); + + template + simdjson_warn_unused simdjson_inline std::optional optional() noexcept(is_nothrow_gettable); +}; + + +/** + * A utility class for adapting values for the `auto_parser`. + * This template is not part of our public API. It is subject to changes. + * @private + */ +template +struct to_adaptor { + T operator()(simdjson_result &val) const noexcept; + auto operator()(padded_string_view const str) const noexcept; + auto operator()(ondemand::parser &parser, padded_string_view const str) const noexcept; +}; +} // namespace internal +} // namespace convert + +/** + * The simdjson::from instance is EXPERIMENTAL AND SUBJECT TO CHANGES. + * + * The `from` instance is a utility adaptor for parsing JSON strings into objects. + * It provides a convenient way to convert JSON data into C++ objects using the `auto_parser`. + * + * Example usage: + * + * ```cpp + * std::map obj = + * simdjson::from(R"({"key": "value"})"_padded); + * ``` + * + * This will parse the JSON string and return an object representation. By default, we + * use the simdjson::ondemand::parser::get_parser() instance. A parser instance should + * be used for just one document at a time. + * + * You can also pass you own parser instance: + * ```cpp + * simdjson::ondemand::parser parser; + * std::map obj = + * simdjson::from(parser, R"({"key": "value"})"_padded); + * ``` + * The parser instance can be reused. + * + * This functionality requires C++20 or better. + */ +static constexpr convert::internal::to_adaptor<> from{}; + +} // namespace simdjson + +#endif // SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_CONVERT_H +/* end file simdjson/convert.h */ +/* including simdjson/convert-inl.h: #include "simdjson/convert-inl.h" */ +/* begin file simdjson/convert-inl.h */ + +#ifndef SIMDJSON_CONVERT_INL_H +#define SIMDJSON_CONVERT_INL_H + +/* skipped duplicate #include "simdjson/convert.h" */ +#if SIMDJSON_SUPPORTS_CONCEPTS +namespace simdjson { +namespace convert { +namespace internal { +// auto_parser method definitions +template +inline auto_parser::auto_parser(parser_type &&parser, ondemand::document &&doc) noexcept requires(!std::is_pointer_v) + : m_parser{std::move(parser)}, m_doc{std::move(doc)} {} + +template +inline auto_parser::auto_parser(parser_type &&parser, padded_string_view const str) noexcept requires(!std::is_pointer_v) + : m_parser{std::move(parser)}, m_doc{}, m_error{SUCCESS} { + m_error = m_parser.iterate(str).get(m_doc); +} + +template +inline auto_parser::auto_parser(std::remove_pointer_t &parser, ondemand::document &&doc) noexcept requires(std::is_pointer_v) + : m_parser{&parser}, m_doc{std::move(doc)} {} + +template +inline auto_parser::auto_parser(std::remove_pointer_t &parser, padded_string_view const str) noexcept requires(std::is_pointer_v) + : m_parser{&parser}, m_doc{}, m_error{SUCCESS} { + m_error = m_parser->iterate(str).get(m_doc); +} + +template +inline auto_parser::auto_parser(padded_string_view const str) noexcept requires(std::is_pointer_v) + : auto_parser{ondemand::parser::get_parser(), str} {} + +template +inline auto_parser::auto_parser(parser_type parser, ondemand::document &&doc) noexcept requires(std::is_pointer_v) + : auto_parser{*parser, std::move(doc)} {} + + + + + +template +inline std::remove_pointer_t &auto_parser::parser() noexcept { + if constexpr (std::is_pointer_v) { + return *m_parser; + } else { + return m_parser; + } +} + +template +template +inline simdjson_result auto_parser::result() noexcept(is_nothrow_gettable) { + if (m_error != SUCCESS) { + return m_error; + } + return m_doc.get(); +} + +template +inline simdjson_result auto_parser::array() noexcept { + return result(); +} + +template +inline simdjson_result auto_parser::object() noexcept { + return result(); +} + +template +inline simdjson_result auto_parser::number() noexcept { + return result(); +} + +template +template +inline auto_parser::operator simdjson_result() noexcept(is_nothrow_gettable) { + return result(); +} + +template +template +inline auto_parser::operator T() noexcept(false) { + if (m_error != SUCCESS) { + throw simdjson_error(m_error); + } + return m_doc.get(); +} + +template +template +inline std::optional auto_parser::optional() noexcept(is_nothrow_gettable) { + if (m_error != SUCCESS) { + return std::nullopt; + } + T value; + if (m_doc.get().get(value)) [[unlikely]] { + return std::nullopt; + } + return {std::move(value)}; +} + +// to_adaptor method definitions +template +inline T to_adaptor::operator()(simdjson_result &val) const noexcept { + return val.get(); +} + +template +inline auto to_adaptor::operator()(padded_string_view const str) const noexcept { + return auto_parser{str}; +} + +template +inline auto to_adaptor::operator()(ondemand::parser &parser, padded_string_view const str) const noexcept { + return auto_parser{parser, str}; +} +} // namespace internal +} // namespace convert +} // namespace simdjson +#endif // SIMDJSON_SUPPORTS_CONCEPTS +#endif // SIMDJSON_CONVERT_INL_H +/* end file simdjson/convert-inl.h */ #endif // SIMDJSON_H /* end file simdjson.h */