From afbd63e655df37f07445a6e1abd6b1347a97681b Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Mon, 23 Mar 2026 14:36:45 -0400 Subject: [PATCH] Upgrade Core to `28d02328c467616ecec2bffd37c07930974fedf2` Signed-off-by: Juan Cruz Viotti --- DEPENDENCIES | 2 +- src/runtime/decoder_number.cc | 13 +- vendor/core/CMakeLists.txt | 84 +- vendor/core/DEPENDENCIES | 19 + vendor/core/cmake/Findmpdecimal.cmake | 146 - vendor/core/cmake/Findyaml.cmake | 103 - .../core/cmake/common/compiler/options.cmake | 5 +- vendor/core/cmake/common/defaults.cmake | 13 +- .../cmake/common/targets/executable.cmake | 40 + vendor/core/config.cmake.in | 62 +- vendor/core/src/core/crypto/CMakeLists.txt | 13 + vendor/core/src/core/crypto/crypto_sha256.cc | 230 + vendor/core/src/core/crypto/crypto_uuid.cc | 81 + .../crypto/include/sourcemeta/core/crypto.h | 16 + .../include/sourcemeta/core/crypto_sha256.h | 30 + .../include/sourcemeta/core/crypto_uuid.h | 27 + vendor/core/src/core/html/CMakeLists.txt | 6 +- vendor/core/src/core/html/encoder.cc | 74 - vendor/core/src/core/html/escape.cc | 77 + .../core/html/include/sourcemeta/core/html.h | 3 +- .../include/sourcemeta/core/html_buffer.h | 93 + .../include/sourcemeta/core/html_elements.h | 450 - .../include/sourcemeta/core/html_encoder.h | 145 - .../include/sourcemeta/core/html_escape.h | 16 +- .../include/sourcemeta/core/html_writer.h | 466 + vendor/core/src/core/html/writer.cc | 49 + vendor/core/src/core/json/CMakeLists.txt | 2 + vendor/core/src/core/json/construct.h | 648 ++ vendor/core/src/core/json/grammar.h | 3 + .../core/json/include/sourcemeta/core/json.h | 94 +- .../json/include/sourcemeta/core/json_array.h | 5 + .../json/include/sourcemeta/core/json_auto.h | 25 +- .../json/include/sourcemeta/core/json_hash.h | 178 +- .../include/sourcemeta/core/json_object.h | 32 +- .../json/include/sourcemeta/core/json_value.h | 307 +- vendor/core/src/core/json/json.cc | 161 +- vendor/core/src/core/json/json_value.cc | 285 +- vendor/core/src/core/json/parser.h | 1743 ++-- .../include/sourcemeta/core/jsonpointer.h | 17 +- .../sourcemeta/core/jsonpointer_pointer.h | 47 +- .../sourcemeta/core/jsonpointer_position.h | 41 +- .../core/src/core/jsonpointer/jsonpointer.cc | 25 +- vendor/core/src/core/jsonpointer/parser.h | 94 +- vendor/core/src/core/jsonpointer/position.cc | 189 +- vendor/core/src/core/jsonschema/bundle.cc | 267 +- vendor/core/src/core/jsonschema/frame.cc | 422 +- vendor/core/src/core/jsonschema/helpers.h | 27 + .../include/sourcemeta/core/jsonschema.h | 9 +- .../sourcemeta/core/jsonschema_frame.h | 71 +- .../sourcemeta/core/jsonschema_transform.h | 7 +- .../src/core/jsonschema/known_resolver.in.cc | 412 +- .../core/md5/include/sourcemeta/core/md5.h | 39 - vendor/core/src/core/md5/md5.cc | 169 - vendor/core/src/core/punycode/CMakeLists.txt | 4 +- vendor/core/src/core/punycode/punycode.cc | 16 +- vendor/core/src/core/punycode/utf8.h | 87 - vendor/core/src/core/regex/preprocess.h | 13 +- vendor/core/src/core/semver/CMakeLists.txt | 9 + .../semver/include/sourcemeta/core/semver.h | 131 + .../include/sourcemeta/core/semver_error.h | 56 + vendor/core/src/core/semver/semver.cc | 450 + .../src/core/{uuid => unicode}/CMakeLists.txt | 5 +- .../unicode/include/sourcemeta/core/unicode.h | 103 + vendor/core/src/core/unicode/unicode.cc | 114 + .../core/uri/include/sourcemeta/core/uri.h | 34 +- vendor/core/src/core/uri/parse.cc | 434 +- vendor/core/src/core/uri/resolution.cc | 10 +- .../core/src/core/uritemplate/CMakeLists.txt | 2 - .../sourcemeta/core/uritemplate_error.h | 20 + .../sourcemeta/core/uritemplate_router.h | 8 +- .../uritemplate/uritemplate_router_view.cc | 110 +- .../core/uuid/include/sourcemeta/core/uuid.h | 36 - vendor/core/src/core/uuid/uuid.cc | 34 - vendor/core/src/core/yaml/CMakeLists.txt | 7 +- .../core/yaml/include/sourcemeta/core/yaml.h | 128 +- .../yaml/include/sourcemeta/core/yaml_error.h | 62 +- .../include/sourcemeta/core/yaml_roundtrip.h | 80 + vendor/core/src/core/yaml/lexer.h | 1496 +++ vendor/core/src/core/yaml/parser.h | 1961 ++++ vendor/core/src/core/yaml/stringify.h | 872 ++ vendor/core/src/core/yaml/yaml.cc | 636 +- .../src/extension/alterschema/CMakeLists.txt | 2 + .../src/extension/alterschema/alterschema.cc | 4 + .../alterschema/common/const_in_enum.h | 34 + .../alterschema/common/orphan_definitions.h | 33 +- .../required_properties_in_properties.h | 31 +- .../common/unknown_keywords_prefix.h | 20 + .../alterschema/linter/const_not_in_enum.h | 30 + .../core/src/extension/build/CMakeLists.txt | 8 - .../src/extension/build/adapter_filesystem.cc | 114 - .../build/include/sourcemeta/core/build.h | 106 - .../core/build_adapter_filesystem.h | 58 - .../include/sourcemeta/core/build_types.h | 26 - .../src/extension/schemaconfig/CMakeLists.txt | 11 - .../include/sourcemeta/core/schemaconfig.h | 78 - .../sourcemeta/core/schemaconfig_error.h | 53 - .../core/src/extension/schemaconfig/parse.cc | 173 - .../extension/schemaconfig/schemaconfig.cc | 43 - vendor/core/src/lang/io/CMakeLists.txt | 4 +- .../src/lang/io/include/sourcemeta/core/io.h | 35 + .../io/include/sourcemeta/core/io_temporary.h | 52 + vendor/core/src/lang/io/io.cc | 76 +- vendor/core/src/lang/io/io_temporary.cc | 66 + vendor/core/src/lang/numeric/CMakeLists.txt | 5 +- .../core/src/lang/numeric/big_coefficient.h | 754 ++ vendor/core/src/lang/numeric/decimal.cc | 2043 +++- .../numeric/include/sourcemeta/core/numeric.h | 1 + .../include/sourcemeta/core/numeric_decimal.h | 89 +- .../include/sourcemeta/core/numeric_parse.h | 6 + .../include/sourcemeta/core/numeric_uint128.h | 232 + vendor/core/src/lang/numeric/parse.cc | 7 +- .../options/CMakeLists.txt | 0 .../options/include/sourcemeta/core/options.h | 0 .../include/sourcemeta/core/options_error.h | 0 .../{extension => lang}/options/options.cc | 0 .../sourcemeta/core/parallel_for_each.h | 21 +- .../md5 => lang/preprocessor}/CMakeLists.txt | 4 +- .../include/sourcemeta/core/preprocessor.h | 12 + .../include/sourcemeta/core/process_error.h | 4 +- vendor/core/src/lang/process/spawn.cc | 15 +- vendor/core/vendor-mpdecimal.sh | 43 - vendor/core/vendor/mpdecimal/COPYRIGHT.txt | 23 - .../vendor/mpdecimal/libmpdec/basearith.c | 649 -- .../vendor/mpdecimal/libmpdec/basearith.h | 217 - vendor/core/vendor/mpdecimal/libmpdec/bits.h | 188 - .../vendor/mpdecimal/libmpdec/constants.c | 129 - .../vendor/mpdecimal/libmpdec/constants.h | 88 - .../core/vendor/mpdecimal/libmpdec/context.c | 285 - .../vendor/mpdecimal/libmpdec/convolute.c | 172 - .../vendor/mpdecimal/libmpdec/convolute.h | 48 - vendor/core/vendor/mpdecimal/libmpdec/crt.c | 178 - vendor/core/vendor/mpdecimal/libmpdec/crt.h | 45 - .../vendor/mpdecimal/libmpdec/difradix2.c | 171 - .../vendor/mpdecimal/libmpdec/difradix2.h | 46 - vendor/core/vendor/mpdecimal/libmpdec/fnt.c | 77 - vendor/core/vendor/mpdecimal/libmpdec/fnt.h | 46 - .../core/vendor/mpdecimal/libmpdec/fourstep.c | 242 - .../core/vendor/mpdecimal/libmpdec/fourstep.h | 46 - vendor/core/vendor/mpdecimal/libmpdec/io.c | 1610 --- vendor/core/vendor/mpdecimal/libmpdec/io.h | 61 - .../core/vendor/mpdecimal/libmpdec/mpalloc.c | 347 - .../core/vendor/mpdecimal/libmpdec/mpalloc.h | 53 - .../vendor/mpdecimal/libmpdec/mpdecimal.c | 9155 ----------------- .../vendor/mpdecimal/libmpdec/mpdecimal.h.in | 804 -- .../vendor/mpdecimal/libmpdec/mpdecimal32vc.h | 762 -- .../vendor/mpdecimal/libmpdec/mpdecimal64vc.h | 768 -- .../core/vendor/mpdecimal/libmpdec/mpsignal.c | 966 -- .../vendor/mpdecimal/libmpdec/numbertheory.c | 129 - .../vendor/mpdecimal/libmpdec/numbertheory.h | 75 - .../core/vendor/mpdecimal/libmpdec/sixstep.c | 212 - .../core/vendor/mpdecimal/libmpdec/sixstep.h | 46 - .../vendor/mpdecimal/libmpdec/transpose.c | 275 - .../vendor/mpdecimal/libmpdec/transpose.h | 60 - .../vendor/mpdecimal/libmpdec/typearith.h | 661 -- .../vendor/mpdecimal/libmpdec/umodarith.h | 645 -- .../vendor/mpdecimal/libmpdec/vcdiv64.asm | 47 - vendor/core/vendor/yaml/License | 20 - vendor/core/vendor/yaml/include/yaml.h | 1985 ---- vendor/core/vendor/yaml/src/api.c | 1393 --- vendor/core/vendor/yaml/src/dumper.c | 394 - vendor/core/vendor/yaml/src/emitter.c | 2358 ----- vendor/core/vendor/yaml/src/loader.c | 544 - vendor/core/vendor/yaml/src/parser.c | 1375 --- vendor/core/vendor/yaml/src/reader.c | 469 - vendor/core/vendor/yaml/src/scanner.c | 3598 ------- vendor/core/vendor/yaml/src/writer.c | 141 - vendor/core/vendor/yaml/src/yaml_private.h | 684 -- 167 files changed, 13429 insertions(+), 37651 deletions(-) create mode 100644 vendor/core/DEPENDENCIES delete mode 100644 vendor/core/cmake/Findmpdecimal.cmake delete mode 100644 vendor/core/cmake/Findyaml.cmake create mode 100644 vendor/core/src/core/crypto/CMakeLists.txt create mode 100644 vendor/core/src/core/crypto/crypto_sha256.cc create mode 100644 vendor/core/src/core/crypto/crypto_uuid.cc create mode 100644 vendor/core/src/core/crypto/include/sourcemeta/core/crypto.h create mode 100644 vendor/core/src/core/crypto/include/sourcemeta/core/crypto_sha256.h create mode 100644 vendor/core/src/core/crypto/include/sourcemeta/core/crypto_uuid.h delete mode 100644 vendor/core/src/core/html/encoder.cc create mode 100644 vendor/core/src/core/html/include/sourcemeta/core/html_buffer.h delete mode 100644 vendor/core/src/core/html/include/sourcemeta/core/html_elements.h delete mode 100644 vendor/core/src/core/html/include/sourcemeta/core/html_encoder.h create mode 100644 vendor/core/src/core/html/include/sourcemeta/core/html_writer.h create mode 100644 vendor/core/src/core/html/writer.cc create mode 100644 vendor/core/src/core/json/construct.h delete mode 100644 vendor/core/src/core/md5/include/sourcemeta/core/md5.h delete mode 100644 vendor/core/src/core/md5/md5.cc delete mode 100644 vendor/core/src/core/punycode/utf8.h create mode 100644 vendor/core/src/core/semver/CMakeLists.txt create mode 100644 vendor/core/src/core/semver/include/sourcemeta/core/semver.h create mode 100644 vendor/core/src/core/semver/include/sourcemeta/core/semver_error.h create mode 100644 vendor/core/src/core/semver/semver.cc rename vendor/core/src/core/{uuid => unicode}/CMakeLists.txt (50%) create mode 100644 vendor/core/src/core/unicode/include/sourcemeta/core/unicode.h create mode 100644 vendor/core/src/core/unicode/unicode.cc delete mode 100644 vendor/core/src/core/uuid/include/sourcemeta/core/uuid.h delete mode 100644 vendor/core/src/core/uuid/uuid.cc create mode 100644 vendor/core/src/core/yaml/include/sourcemeta/core/yaml_roundtrip.h create mode 100644 vendor/core/src/core/yaml/lexer.h create mode 100644 vendor/core/src/core/yaml/parser.h create mode 100644 vendor/core/src/core/yaml/stringify.h create mode 100644 vendor/core/src/extension/alterschema/common/const_in_enum.h create mode 100644 vendor/core/src/extension/alterschema/linter/const_not_in_enum.h delete mode 100644 vendor/core/src/extension/build/CMakeLists.txt delete mode 100644 vendor/core/src/extension/build/adapter_filesystem.cc delete mode 100644 vendor/core/src/extension/build/include/sourcemeta/core/build.h delete mode 100644 vendor/core/src/extension/build/include/sourcemeta/core/build_adapter_filesystem.h delete mode 100644 vendor/core/src/extension/build/include/sourcemeta/core/build_types.h delete mode 100644 vendor/core/src/extension/schemaconfig/CMakeLists.txt delete mode 100644 vendor/core/src/extension/schemaconfig/include/sourcemeta/core/schemaconfig.h delete mode 100644 vendor/core/src/extension/schemaconfig/include/sourcemeta/core/schemaconfig_error.h delete mode 100644 vendor/core/src/extension/schemaconfig/parse.cc delete mode 100644 vendor/core/src/extension/schemaconfig/schemaconfig.cc create mode 100644 vendor/core/src/lang/io/include/sourcemeta/core/io_temporary.h create mode 100644 vendor/core/src/lang/io/io_temporary.cc create mode 100644 vendor/core/src/lang/numeric/big_coefficient.h create mode 100644 vendor/core/src/lang/numeric/include/sourcemeta/core/numeric_uint128.h rename vendor/core/src/{extension => lang}/options/CMakeLists.txt (100%) rename vendor/core/src/{extension => lang}/options/include/sourcemeta/core/options.h (100%) rename vendor/core/src/{extension => lang}/options/include/sourcemeta/core/options_error.h (100%) rename vendor/core/src/{extension => lang}/options/options.cc (100%) rename vendor/core/src/{core/md5 => lang/preprocessor}/CMakeLists.txt (52%) create mode 100644 vendor/core/src/lang/preprocessor/include/sourcemeta/core/preprocessor.h delete mode 100755 vendor/core/vendor-mpdecimal.sh delete mode 100644 vendor/core/vendor/mpdecimal/COPYRIGHT.txt delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/basearith.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/basearith.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/bits.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/constants.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/constants.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/context.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/convolute.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/convolute.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/crt.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/crt.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/difradix2.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/difradix2.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/fnt.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/fnt.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/fourstep.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/fourstep.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/io.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/io.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpalloc.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpalloc.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpdecimal.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpdecimal.h.in delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpdecimal32vc.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpdecimal64vc.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/mpsignal.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/numbertheory.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/numbertheory.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/sixstep.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/sixstep.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/transpose.c delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/transpose.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/typearith.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/umodarith.h delete mode 100644 vendor/core/vendor/mpdecimal/libmpdec/vcdiv64.asm delete mode 100644 vendor/core/vendor/yaml/License delete mode 100644 vendor/core/vendor/yaml/include/yaml.h delete mode 100644 vendor/core/vendor/yaml/src/api.c delete mode 100644 vendor/core/vendor/yaml/src/dumper.c delete mode 100644 vendor/core/vendor/yaml/src/emitter.c delete mode 100644 vendor/core/vendor/yaml/src/loader.c delete mode 100644 vendor/core/vendor/yaml/src/parser.c delete mode 100644 vendor/core/vendor/yaml/src/reader.c delete mode 100644 vendor/core/vendor/yaml/src/scanner.c delete mode 100644 vendor/core/vendor/yaml/src/writer.c delete mode 100644 vendor/core/vendor/yaml/src/yaml_private.h diff --git a/DEPENDENCIES b/DEPENDENCIES index d03fe6d2..521cd67d 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,3 +1,3 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core fe450b982907f99e542a0cfc78bc60d2b600ff7a +core https://github.com/sourcemeta/core 28d02328c467616ecec2bffd37c07930974fedf2 bootstrap https://github.com/twbs/bootstrap 1a6fdfae6be09b09eaced8f0e442ca6f7680a61e diff --git a/src/runtime/decoder_number.cc b/src/runtime/decoder_number.cc index 865661a4..9d0a6869 100644 --- a/src/runtime/decoder_number.cc +++ b/src/runtime/decoder_number.cc @@ -1,15 +1,24 @@ #include -#include // std::pow #include // std::int64_t, std::uint64_t +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC optimize("no-reciprocal-math") +#endif + namespace sourcemeta::jsonbinpack { auto Decoder::DOUBLE_VARINT_TUPLE(const struct DOUBLE_VARINT_TUPLE &) -> sourcemeta::core::JSON { +#ifdef __clang__ +#pragma clang fp reciprocal(off) +#endif const std::int64_t digits{this->get_varint_zigzag()}; const std::uint64_t point{this->get_varint()}; - const double divisor{std::pow(10, static_cast(point))}; + double divisor{1.0}; + for (std::uint64_t i = 0; i < point; ++i) { + divisor *= 10.0; + } return sourcemeta::core::JSON{static_cast(digits) / divisor}; } diff --git a/vendor/core/CMakeLists.txt b/vendor/core/CMakeLists.txt index e6914a55..4c16e6d8 100644 --- a/vendor/core/CMakeLists.txt +++ b/vendor/core/CMakeLists.txt @@ -3,14 +3,17 @@ project(core VERSION 0.0.0 LANGUAGES C CXX ASM_MASM DESCRIPTION "Sourcemeta Core list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") # Options +option(SOURCEMETA_CORE_LANG_PREPROCESSOR "Build the Sourcemeta Core language preprocessor library" ON) option(SOURCEMETA_CORE_LANG_IO "Build the Sourcemeta Core language I/O library" ON) option(SOURCEMETA_CORE_LANG_PROCESS "Build the Sourcemeta Core language Process library" ON) option(SOURCEMETA_CORE_LANG_PARALLEL "Build the Sourcemeta Core language parallel library" ON) option(SOURCEMETA_CORE_LANG_NUMERIC "Build the Sourcemeta Core language numeric library" ON) +option(SOURCEMETA_CORE_LANG_OPTIONS "Build the Sourcemeta Core Options library" ON) +option(SOURCEMETA_CORE_UNICODE "Build the Sourcemeta Core Unicode library" ON) option(SOURCEMETA_CORE_PUNYCODE "Build the Sourcemeta Core Punycode library" ON) option(SOURCEMETA_CORE_TIME "Build the Sourcemeta Core time library" ON) -option(SOURCEMETA_CORE_UUID "Build the Sourcemeta Core UUID library" ON) -option(SOURCEMETA_CORE_MD5 "Build the Sourcemeta Core MD5 library" ON) +option(SOURCEMETA_CORE_CRYPTO "Build the Sourcemeta Core Crypto library" ON) +option(SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL "Use system OpenSSL for the Sourcemeta Core Crypto library" OFF) option(SOURCEMETA_CORE_REGEX "Build the Sourcemeta Core Regex library" ON) option(SOURCEMETA_CORE_URI "Build the Sourcemeta Core URI library" ON) option(SOURCEMETA_CORE_URITEMPLATE "Build the Sourcemeta Core URI Template library" ON) @@ -19,12 +22,10 @@ option(SOURCEMETA_CORE_JSONSCHEMA "Build the Sourcemeta Core JSON Schema library option(SOURCEMETA_CORE_JSONPOINTER "Build the Sourcemeta Core JSON Pointer library" ON) option(SOURCEMETA_CORE_JSONL "Build the Sourcemeta Core JSONL library" ON) option(SOURCEMETA_CORE_YAML "Build the Sourcemeta Core YAML library" ON) +option(SOURCEMETA_CORE_SEMVER "Build the Sourcemeta Core SemVer library" ON) option(SOURCEMETA_CORE_HTML "Build the Sourcemeta Core HTML library" ON) option(SOURCEMETA_CORE_EXTENSION_ALTERSCHEMA "Build the Sourcemeta Core AlterSchema library" ON) option(SOURCEMETA_CORE_EXTENSION_EDITORSCHEMA "Build the Sourcemeta Core EditorSchema library" ON) -option(SOURCEMETA_CORE_EXTENSION_SCHEMACONFIG "Build the Sourcemeta Core SchemaConfig library" ON) -option(SOURCEMETA_CORE_EXTENSION_OPTIONS "Build the Sourcemeta Core Options library" ON) -option(SOURCEMETA_CORE_EXTENSION_BUILD "Build the Sourcemeta Core Build library" ON) option(SOURCEMETA_CORE_TESTS "Build the Sourcemeta Core tests" OFF) option(SOURCEMETA_CORE_BENCHMARK "Build the Sourcemeta Core benchmarks" OFF) option(SOURCEMETA_CORE_DOCS "Build the Sourcemeta Core docs" OFF) @@ -59,6 +60,10 @@ if(SOURCEMETA_CORE_INSTALL) COMPONENT sourcemeta_${PROJECT_NAME}_dev) endif() +if(SOURCEMETA_CORE_LANG_PREPROCESSOR) + add_subdirectory(src/lang/preprocessor) +endif() + if(SOURCEMETA_CORE_LANG_IO) add_subdirectory(src/lang/io) endif() @@ -73,10 +78,17 @@ if(SOURCEMETA_CORE_LANG_PARALLEL) endif() if(SOURCEMETA_CORE_LANG_NUMERIC) - find_package(mpdecimal REQUIRED) add_subdirectory(src/lang/numeric) endif() +if(SOURCEMETA_CORE_LANG_OPTIONS) + add_subdirectory(src/lang/options) +endif() + +if(SOURCEMETA_CORE_UNICODE) + add_subdirectory(src/core/unicode) +endif() + if(SOURCEMETA_CORE_PUNYCODE) add_subdirectory(src/core/punycode) endif() @@ -85,12 +97,11 @@ if(SOURCEMETA_CORE_TIME) add_subdirectory(src/core/time) endif() -if(SOURCEMETA_CORE_UUID) - add_subdirectory(src/core/uuid) -endif() - -if(SOURCEMETA_CORE_MD5) - add_subdirectory(src/core/md5) +if(SOURCEMETA_CORE_CRYPTO) + if(SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL) + find_package(OpenSSL REQUIRED) + endif() + add_subdirectory(src/core/crypto) endif() if(SOURCEMETA_CORE_REGEX) @@ -123,10 +134,13 @@ if(SOURCEMETA_CORE_JSONL) endif() if(SOURCEMETA_CORE_YAML) - find_package(yaml REQUIRED) add_subdirectory(src/core/yaml) endif() +if(SOURCEMETA_CORE_SEMVER) + add_subdirectory(src/core/semver) +endif() + if(SOURCEMETA_CORE_HTML) add_subdirectory(src/core/html) endif() @@ -139,18 +153,6 @@ if(SOURCEMETA_CORE_EXTENSION_EDITORSCHEMA) add_subdirectory(src/extension/editorschema) endif() -if(SOURCEMETA_CORE_EXTENSION_SCHEMACONFIG) - add_subdirectory(src/extension/schemaconfig) -endif() - -if(SOURCEMETA_CORE_EXTENSION_OPTIONS) - add_subdirectory(src/extension/options) -endif() - -if(SOURCEMETA_CORE_EXTENSION_BUILD) - add_subdirectory(src/extension/build) -endif() - if(SOURCEMETA_CORE_ADDRESS_SANITIZER) sourcemeta_sanitizer(TYPE address) elseif(SOURCEMETA_CORE_UNDEFINED_SANITIZER) @@ -198,6 +200,14 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/numeric) endif() + if(SOURCEMETA_CORE_LANG_OPTIONS) + add_subdirectory(test/options) + endif() + + if(SOURCEMETA_CORE_UNICODE) + add_subdirectory(test/unicode) + endif() + if(SOURCEMETA_CORE_PUNYCODE) add_subdirectory(test/punycode) endif() @@ -206,12 +216,8 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/time) endif() - if(SOURCEMETA_CORE_UUID) - add_subdirectory(test/uuid) - endif() - - if(SOURCEMETA_CORE_MD5) - add_subdirectory(test/md5) + if(SOURCEMETA_CORE_CRYPTO) + add_subdirectory(test/crypto) endif() if(SOURCEMETA_CORE_REGEX) @@ -246,6 +252,10 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/yaml) endif() + if(SOURCEMETA_CORE_SEMVER) + add_subdirectory(test/semver) + endif() + if(SOURCEMETA_CORE_HTML) add_subdirectory(test/html) endif() @@ -258,18 +268,6 @@ if(SOURCEMETA_CORE_TESTS) add_subdirectory(test/editorschema) endif() - if(SOURCEMETA_CORE_EXTENSION_SCHEMACONFIG) - add_subdirectory(test/schemaconfig) - endif() - - if(SOURCEMETA_CORE_EXTENSION_OPTIONS) - add_subdirectory(test/options) - endif() - - if(SOURCEMETA_CORE_EXTENSION_BUILD) - add_subdirectory(test/build) - endif() - if(PROJECT_IS_TOP_LEVEL) # Otherwise we need the child project to link # against the sanitizers too. diff --git a/vendor/core/DEPENDENCIES b/vendor/core/DEPENDENCIES new file mode 100644 index 00000000..737c7c1d --- /dev/null +++ b/vendor/core/DEPENDENCIES @@ -0,0 +1,19 @@ +vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 +jsontestsuite https://github.com/nst/JSONTestSuite d64aefb55228d9584d3e5b2433f720ea8fd00c82 +yaml-test-suite https://github.com/yaml/yaml-test-suite data-2022-01-17 +jsonschema-2020-12 https://github.com/json-schema-org/json-schema-spec 769daad75a9553562333a8937a187741cb708c72 +jsonschema-2019-09 https://github.com/json-schema-org/json-schema-spec 41014ea723120ce70b314d72f863c6929d9f3cfd +jsonschema-draft7 https://github.com/json-schema-org/json-schema-spec 567f768506aaa33a38e552c85bf0586029ef1b32 +jsonschema-draft6 https://github.com/json-schema-org/json-schema-spec 59ed5f6fc6f6386e23ca51d7f31d7fe9cf696713 +jsonschema-draft4 https://github.com/json-schema-org/json-schema-spec 955d185db846cfca84269d9d711b10f4f3353d38 +jsonschema-draft3 https://github.com/json-schema-org/json-schema-spec 89912ad69fe15e006e8336a59e93bf7a1e46fa54 +jsonschema-draft2 https://github.com/json-schema-org/json-schema-spec 707f65070d09fe5baa1315bce4d31a66ff124171 +jsonschema-draft1 https://github.com/json-schema-org/json-schema-spec 2072feec9fc7a7ff0b2bb5b02c2d6742c554cc4a +jsonschema-draft0 https://github.com/json-schema-org/json-schema-spec 7ea575aef8d5c0183acbe6ff65b4c98ee9c236ec +openapi https://github.com/OAI/OpenAPI-Specification 74906beddddab9e555337031b2a8d8e9338c4972 +referencing-suite https://github.com/python-jsonschema/referencing-suite 61c4cc202b1e96ed5adcaf4842a595f68d659212 +uritemplate-test https://github.com/uri-templates/uritemplate-test 1eb27ab4462b9e5819dc47db99044f5fd1fa9bc7 +pyca-cryptography https://github.com/pyca/cryptography c4935a7021af37c38e0684b0546c1b4378518342 +pcre2 https://github.com/PCRE2Project/pcre2 pcre2-10.47 +googletest https://github.com/google/googletest a7f443b80b105f940225332ed3c31f2790092f47 +googlebenchmark https://github.com/google/benchmark 378fe693a1ef51500db21b11ff05a8018c5f0e55 diff --git a/vendor/core/cmake/Findmpdecimal.cmake b/vendor/core/cmake/Findmpdecimal.cmake deleted file mode 100644 index 01c31c74..00000000 --- a/vendor/core/cmake/Findmpdecimal.cmake +++ /dev/null @@ -1,146 +0,0 @@ -if(NOT mpdecimal_FOUND) - set(MPDECIMAL_DIR "${PROJECT_SOURCE_DIR}/vendor/mpdecimal") - set(MPDECIMAL_SOURCE_DIR "${MPDECIMAL_DIR}/libmpdec") - set(MPDECIMAL_BINARY_DIR "${PROJECT_BINARY_DIR}/mpdecimal") - - file(MAKE_DIRECTORY "${MPDECIMAL_BINARY_DIR}/include") - - if(MSVC) - configure_file( - "${MPDECIMAL_SOURCE_DIR}/mpdecimal64vc.h" - "${MPDECIMAL_BINARY_DIR}/include/mpdecimal.h" - COPYONLY) - set(MPD_CONFIG_LIST CONFIG_64 MASM) - else() - set(MPD_HEADER_CONFIG "/* ABI: 64-bit */") - - if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64") - set(MPD_CONFIG_LIST CONFIG_64 ASM) - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64") - set(MPD_CONFIG_LIST CONFIG_64 ANSI HAVE_UINT128_T) - else() - set(MPD_CONFIG_LIST CONFIG_64 ANSI) - endif() - - configure_file( - "${MPDECIMAL_SOURCE_DIR}/mpdecimal.h.in" - "${MPDECIMAL_BINARY_DIR}/include/mpdecimal.h" - @ONLY) - endif() - - set(MPDECIMAL_PUBLIC_HEADER "${MPDECIMAL_BINARY_DIR}/include/mpdecimal.h") - - set(MPDECIMAL_SOURCES - "${MPDECIMAL_SOURCE_DIR}/basearith.c" - "${MPDECIMAL_SOURCE_DIR}/constants.c" - "${MPDECIMAL_SOURCE_DIR}/context.c" - "${MPDECIMAL_SOURCE_DIR}/convolute.c" - "${MPDECIMAL_SOURCE_DIR}/crt.c" - "${MPDECIMAL_SOURCE_DIR}/difradix2.c" - "${MPDECIMAL_SOURCE_DIR}/fnt.c" - "${MPDECIMAL_SOURCE_DIR}/fourstep.c" - "${MPDECIMAL_SOURCE_DIR}/io.c" - "${MPDECIMAL_SOURCE_DIR}/mpalloc.c" - "${MPDECIMAL_SOURCE_DIR}/mpdecimal.c" - "${MPDECIMAL_SOURCE_DIR}/mpsignal.c" - "${MPDECIMAL_SOURCE_DIR}/numbertheory.c" - "${MPDECIMAL_SOURCE_DIR}/sixstep.c" - "${MPDECIMAL_SOURCE_DIR}/transpose.c") - - if(MSVC) - list(APPEND MPDECIMAL_SOURCES "${MPDECIMAL_SOURCE_DIR}/vcdiv64.asm") - endif() - - add_library(mpdecimal ${MPDECIMAL_SOURCES}) - sourcemeta_add_default_options(PRIVATE mpdecimal) - - if(SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC) - target_compile_options(mpdecimal PRIVATE -Wno-sign-conversion) - target_compile_options(mpdecimal PRIVATE -Wno-implicit-fallthrough) - target_compile_options(mpdecimal PRIVATE -Wno-conversion) - endif() - - if(SOURCEMETA_COMPILER_MSVC) - target_compile_options(mpdecimal PRIVATE /wd4200) - target_compile_options(mpdecimal PRIVATE /wd4702) - target_compile_options(mpdecimal PRIVATE /wd4996) - endif() - - target_include_directories(mpdecimal PRIVATE - "${MPDECIMAL_SOURCE_DIR}") - - target_include_directories(mpdecimal PUBLIC - "$" - "$") - - target_compile_definitions(mpdecimal PUBLIC MPD_CONFIG_64) - foreach(config_item ${MPD_CONFIG_LIST}) - target_compile_definitions(mpdecimal PRIVATE ${config_item}) - endforeach() - - target_compile_definitions(mpdecimal PRIVATE NDEBUG) - - if(SOURCEMETA_OS_LINUX) - target_compile_definitions(mpdecimal PRIVATE _GNU_SOURCE) - endif() - - if(UNIX AND NOT APPLE) - target_link_libraries(mpdecimal PRIVATE m) - endif() - - if(SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC) - target_compile_options(mpdecimal PRIVATE -Wall -Wextra -Wno-unknown-pragmas) - if(BUILD_SHARED_LIBS) - target_compile_options(mpdecimal PUBLIC -fvisibility=default) - endif() - endif() - - if(MSVC) - if(BUILD_SHARED_LIBS) - target_compile_definitions(mpdecimal PRIVATE BUILD_LIBMPDEC) - else() - target_compile_options(mpdecimal PRIVATE /wd4273) - target_compile_definitions(mpdecimal PUBLIC BUILD_LIBMPDEC) - endif() - endif() - - add_library(mpdecimal::mpdecimal ALIAS mpdecimal) - - set_target_properties(mpdecimal - PROPERTIES - OUTPUT_NAME mpdecimal - PUBLIC_HEADER "${MPDECIMAL_PUBLIC_HEADER}" - C_VISIBILITY_PRESET "default" - C_VISIBILITY_INLINES_HIDDEN FALSE - POSITION_INDEPENDENT_CODE ON - EXPORT_NAME mpdecimal) - - if(SOURCEMETA_CORE_INSTALL) - include(GNUInstallDirs) - install(TARGETS mpdecimal - EXPORT mpdecimal - PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - COMPONENT sourcemeta_core_dev - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" - COMPONENT sourcemeta_core - LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" - COMPONENT sourcemeta_core - NAMELINK_COMPONENT sourcemeta_core_dev - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" - COMPONENT sourcemeta_core_dev) - install(EXPORT mpdecimal - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/mpdecimal" - NAMESPACE mpdecimal:: - COMPONENT sourcemeta_core_dev) - - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/mpdecimal-config.cmake - "include(\"\${CMAKE_CURRENT_LIST_DIR}/mpdecimal.cmake\")\n" - "check_required_components(\"mpdecimal\")\n") - install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/mpdecimal-config.cmake" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/mpdecimal" - COMPONENT sourcemeta_core_dev) - endif() - - set(mpdecimal_FOUND ON) -endif() diff --git a/vendor/core/cmake/Findyaml.cmake b/vendor/core/cmake/Findyaml.cmake deleted file mode 100644 index 2e4c160c..00000000 --- a/vendor/core/cmake/Findyaml.cmake +++ /dev/null @@ -1,103 +0,0 @@ -if(NOT Yaml_FOUND) - set(YAML_DIR "${PROJECT_SOURCE_DIR}/vendor/yaml") - set(YAML_PUBLIC_HEADER "${YAML_DIR}/include/yaml.h") - - set(YAML_SOURCES - "${YAML_PUBLIC_HEADER}" - "${YAML_DIR}/src/api.c" - "${YAML_DIR}/src/dumper.c" - "${YAML_DIR}/src/emitter.c" - "${YAML_DIR}/src/loader.c" - "${YAML_DIR}/src/parser.c" - "${YAML_DIR}/src/reader.c" - "${YAML_DIR}/src/scanner.c" - "${YAML_DIR}/src/writer.c" - "${YAML_DIR}/src/yaml_private.h") - - add_library(yaml ${YAML_SOURCES}) - sourcemeta_add_default_options(PRIVATE yaml) - - if(SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC) - target_compile_options(yaml PRIVATE -Wno-implicit-function-declaration) - target_compile_options(yaml PRIVATE -Wno-int-to-pointer-cast) - target_compile_options(yaml PRIVATE -Wno-shadow) - target_compile_options(yaml PRIVATE -Wno-sign-conversion) - target_compile_options(yaml PRIVATE -Wno-shorten-64-to-32) - target_compile_options(yaml PRIVATE -Wno-newline-eof) - target_compile_options(yaml PRIVATE -Wno-conditional-uninitialized) - target_compile_options(yaml PRIVATE -Wno-implicit-int-conversion) - target_compile_options(yaml PRIVATE -Wno-conversion) - target_compile_options(yaml PRIVATE -Wno-unused-value) - endif() - - if(SOURCEMETA_COMPILER_MSVC) - target_compile_options(yaml PRIVATE /wd4996) - target_compile_options(yaml PRIVATE /wd4456) - target_compile_options(yaml PRIVATE /wd4457) - target_compile_options(yaml PRIVATE /wd4267) - target_compile_options(yaml PRIVATE /wd4244) - target_compile_options(yaml PRIVATE /wd4100) - target_compile_options(yaml PRIVATE /wd4245) - target_compile_options(yaml PRIVATE /wd4701) - target_compile_options(yaml PRIVATE /wd4702) - endif() - - if(SOURCEMETA_OS_LINUX) - message(STATUS "Compiling libyaml with _GNU_SOURCE") - # See https://github.com/3DSGuy/Project_CTR/issues/122 - target_compile_definitions(yaml PRIVATE _GNU_SOURCE) - endif() - - if(BUILD_SHARED_LIBS) - target_compile_definitions(yaml PUBLIC YAML_DECLARE_EXPORT) - else() - target_compile_definitions(yaml PUBLIC YAML_DECLARE_STATIC) - endif() - - target_include_directories(yaml PRIVATE "${YAML_DIR}/include") - target_include_directories(yaml PUBLIC - "$" - "$") - - target_compile_definitions(yaml PRIVATE YAML_VERSION_STRING="0.0.0") - target_compile_definitions(yaml PRIVATE YAML_VERSION_MAJOR=0) - target_compile_definitions(yaml PRIVATE YAML_VERSION_MINOR=0) - target_compile_definitions(yaml PRIVATE YAML_VERSION_PATCH=0) - - set_target_properties(yaml - PROPERTIES - OUTPUT_NAME yaml - PUBLIC_HEADER "${YAML_PUBLIC_HEADER}" - C_VISIBILITY_PRESET "default" - C_VISIBILITY_INLINES_HIDDEN FALSE - EXPORT_NAME yaml) - - if(SOURCEMETA_CORE_INSTALL) - include(GNUInstallDirs) - install(TARGETS yaml - EXPORT yaml - PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - COMPONENT sourcemeta_core_dev - RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" - COMPONENT sourcemeta_core - LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" - COMPONENT sourcemeta_core - NAMELINK_COMPONENT sourcemeta_core_dev - ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" - COMPONENT sourcemeta_core_dev) - install(EXPORT yaml - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/yaml" - NAMESPACE yaml:: - COMPONENT sourcemeta_core_dev) - - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/yaml-config.cmake - "include(\"\${CMAKE_CURRENT_LIST_DIR}/yaml.cmake\")\n" - "check_required_components(\"yaml\")\n") - install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/yaml-config.cmake" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/yaml" - COMPONENT sourcemeta_core_dev) - endif() - - set(Yaml_FOUND ON) -endif() diff --git a/vendor/core/cmake/common/compiler/options.cmake b/vendor/core/cmake/common/compiler/options.cmake index 1c14a167..37754283 100644 --- a/vendor/core/cmake/common/compiler/options.cmake +++ b/vendor/core/cmake/common/compiler/options.cmake @@ -46,6 +46,10 @@ function(sourcemeta_add_default_options visibility target) # To improve how much GCC/Clang will vectorize -fno-math-errno + -fno-trapping-math + -fno-signed-zeros + -freciprocal-math + -fassociative-math # Assume that signed arithmetic overflow of addition, subtraction and # multiplication wraps around using twos-complement representation @@ -82,7 +86,6 @@ function(sourcemeta_add_default_options visibility target) -fslp-vectorize) elseif(SOURCEMETA_COMPILER_GCC) target_compile_options("${target}" ${visibility} - -fno-trapping-math # Newer versions of GCC (i.e. 14) seem to print a lot of false-positives here $<$,$>:-Wno-dangling-reference> # GCC seems to print a lot of false-positives here diff --git a/vendor/core/cmake/common/defaults.cmake b/vendor/core/cmake/common/defaults.cmake index 23850584..93c8bc14 100644 --- a/vendor/core/cmake/common/defaults.cmake +++ b/vendor/core/cmake/common/defaults.cmake @@ -97,20 +97,21 @@ endif() # Note we don't enable LTO on RelWithDebInfo, as it breaks debugging symbols # on at least AppleClang, making stepping through source code impossible. +# LTO is applied globally because it is a whole-program optimization. +# Every translation unit must be compiled with LTO flags for the linker +# to perform cross-module optimization effectively. if(CMAKE_BUILD_TYPE STREQUAL "Release") if(SOURCEMETA_COMPILER_GCC AND NOT BUILD_SHARED_LIBS) message(STATUS "Enabling Fat LTO") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto -ffat-lto-objects") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto") + add_compile_options(-flto -ffat-lto-objects) + add_link_options(-flto) endif() # TODO: Make this work on Linux on LLVM if(SOURCEMETA_COMPILER_LLVM AND NOT BUILD_SHARED_LIBS AND APPLE) message(STATUS "Enabling Fat LTO") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -flto=full") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full") + add_compile_options(-flto=full) + add_link_options(-flto=full) endif() endif() diff --git a/vendor/core/cmake/common/targets/executable.cmake b/vendor/core/cmake/common/targets/executable.cmake index 4f5db98f..2066d39a 100644 --- a/vendor/core/cmake/common/targets/executable.cmake +++ b/vendor/core/cmake/common/targets/executable.cmake @@ -30,5 +30,45 @@ function(sourcemeta_executable) add_executable("${TARGET_NAME}" ${SOURCEMETA_EXECUTABLE_SOURCES}) sourcemeta_add_default_options(PRIVATE ${TARGET_NAME}) + + # See https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html + # Position Independent Executable (PIE) for ASLR support + if(SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC) + target_compile_options(${TARGET_NAME} PRIVATE + $<$:-fPIE> + $<$:-fPIE> + $<$:-fPIE>) + target_link_options(${TARGET_NAME} PRIVATE + $<$:-pie> + $<$:-pie> + $<$:-pie>) + endif() + + # See https://learn.microsoft.com/en-us/cpp/build/reference/guard-enable-control-flow-guard + # See https://learn.microsoft.com/en-us/cpp/build/reference/cetcompat + if(SOURCEMETA_COMPILER_MSVC) + target_compile_options(${TARGET_NAME} PRIVATE /guard:cf) + target_link_options(${TARGET_NAME} PRIVATE /guard:cf /CETCOMPAT) + endif() + + # Linux-specific ELF linker hardening options + if(SOURCEMETA_OS_LINUX AND (SOURCEMETA_COMPILER_LLVM OR SOURCEMETA_COMPILER_GCC)) + target_link_options(${TARGET_NAME} PRIVATE + "LINKER:-z,nodlopen" + "LINKER:-z,noexecstack" + "LINKER:-z,relro" + "LINKER:-z,now" + "LINKER:--as-needed") + if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.18") + include(CheckLinkerFlag) + check_linker_flag(CXX "LINKER:--no-copy-dt-needed-entries" + SOURCEMETA_LINKER_NO_COPY_DT_NEEDED) + if(SOURCEMETA_LINKER_NO_COPY_DT_NEEDED) + target_link_options(${TARGET_NAME} PRIVATE + "LINKER:--no-copy-dt-needed-entries") + endif() + endif() + endif() + set_target_properties("${TARGET_NAME}" PROPERTIES FOLDER "${FOLDER_NAME}") endfunction() diff --git a/vendor/core/config.cmake.in b/vendor/core/config.cmake.in index 2e23e907..236073c7 100644 --- a/vendor/core/config.cmake.in +++ b/vendor/core/config.cmake.in @@ -4,14 +4,15 @@ list(APPEND SOURCEMETA_CORE_COMPONENTS ${Core_FIND_COMPONENTS}) list(APPEND SOURCEMETA_CORE_COMPONENTS ${core_FIND_COMPONENTS}) if(NOT SOURCEMETA_CORE_COMPONENTS) + list(APPEND SOURCEMETA_CORE_COMPONENTS preprocessor) list(APPEND SOURCEMETA_CORE_COMPONENTS io) list(APPEND SOURCEMETA_CORE_COMPONENTS process) list(APPEND SOURCEMETA_CORE_COMPONENTS parallel) list(APPEND SOURCEMETA_CORE_COMPONENTS numeric) + list(APPEND SOURCEMETA_CORE_COMPONENTS unicode) list(APPEND SOURCEMETA_CORE_COMPONENTS punycode) list(APPEND SOURCEMETA_CORE_COMPONENTS time) - list(APPEND SOURCEMETA_CORE_COMPONENTS uuid) - list(APPEND SOURCEMETA_CORE_COMPONENTS md5) + list(APPEND SOURCEMETA_CORE_COMPONENTS crypto) list(APPEND SOURCEMETA_CORE_COMPONENTS regex) list(APPEND SOURCEMETA_CORE_COMPONENTS uri) list(APPEND SOURCEMETA_CORE_COMPONENTS uritemplate) @@ -20,18 +21,19 @@ if(NOT SOURCEMETA_CORE_COMPONENTS) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonpointer) list(APPEND SOURCEMETA_CORE_COMPONENTS jsonschema) list(APPEND SOURCEMETA_CORE_COMPONENTS yaml) + list(APPEND SOURCEMETA_CORE_COMPONENTS semver) list(APPEND SOURCEMETA_CORE_COMPONENTS html) list(APPEND SOURCEMETA_CORE_COMPONENTS alterschema) list(APPEND SOURCEMETA_CORE_COMPONENTS editorschema) - list(APPEND SOURCEMETA_CORE_COMPONENTS schemaconfig) list(APPEND SOURCEMETA_CORE_COMPONENTS options) - list(APPEND SOURCEMETA_CORE_COMPONENTS build) endif() include(CMakeFindDependencyMacro) foreach(component ${SOURCEMETA_CORE_COMPONENTS}) - if(component STREQUAL "io") + if(component STREQUAL "preprocessor") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") + elseif(component STREQUAL "io") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") elseif(component STREQUAL "process") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_process.cmake") @@ -39,16 +41,20 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) find_dependency(Threads) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_parallel.cmake") elseif(component STREQUAL "numeric") - find_dependency(mpdecimal CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") + elseif(component STREQUAL "unicode") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") elseif(component STREQUAL "punycode") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_punycode.cmake") elseif(component STREQUAL "time") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_time.cmake") - elseif(component STREQUAL "uuid") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uuid.cmake") - elseif(component STREQUAL "md5") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_md5.cmake") + elseif(component STREQUAL "crypto") + if(@SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL@) + find_dependency(OpenSSL) + endif() + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_crypto.cmake") elseif(component STREQUAL "regex") find_dependency(PCRE2 CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") @@ -58,47 +64,54 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uritemplate.cmake") elseif(component STREQUAL "json") - find_dependency(mpdecimal CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") elseif(component STREQUAL "jsonl") - find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonl.cmake") elseif(component STREQUAL "jsonpointer") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") - find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") elseif(component STREQUAL "jsonschema") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") - find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonschema.cmake") elseif(component STREQUAL "yaml") - find_dependency(mpdecimal CONFIG) + find_dependency(PCRE2 CONFIG) + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") - find_dependency(yaml CONFIG) - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_yaml.cmake") + elseif(component STREQUAL "semver") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_semver.cmake") elseif(component STREQUAL "html") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_preprocessor.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_html.cmake") elseif(component STREQUAL "alterschema") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") - find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") find_dependency(PCRE2 CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_regex.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") @@ -107,26 +120,15 @@ foreach(component ${SOURCEMETA_CORE_COMPONENTS}) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_alterschema.cmake") elseif(component STREQUAL "editorschema") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") - find_dependency(mpdecimal CONFIG) include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") + include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_unicode.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonschema.cmake") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_editorschema.cmake") - elseif(component STREQUAL "schemaconfig") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_uri.cmake") - find_dependency(mpdecimal CONFIG) - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_numeric.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_json.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_jsonpointer.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_schemaconfig.cmake") elseif(component STREQUAL "options") include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_options.cmake") - elseif(component STREQUAL "build") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_io.cmake") - include("${CMAKE_CURRENT_LIST_DIR}/sourcemeta_core_build.cmake") else() message(FATAL_ERROR "Unknown Sourcemeta Core component: ${component}") endif() diff --git a/vendor/core/src/core/crypto/CMakeLists.txt b/vendor/core/src/core/crypto/CMakeLists.txt new file mode 100644 index 00000000..2d7fe1fc --- /dev/null +++ b/vendor/core/src/core/crypto/CMakeLists.txt @@ -0,0 +1,13 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME crypto + PRIVATE_HEADERS sha256.h uuid.h + SOURCES crypto_sha256.cc crypto_uuid.cc) + +if(SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL) + target_compile_definitions(sourcemeta_core_crypto + PRIVATE SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL) + target_link_libraries(sourcemeta_core_crypto PRIVATE OpenSSL::Crypto) +endif() + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME crypto) +endif() diff --git a/vendor/core/src/core/crypto/crypto_sha256.cc b/vendor/core/src/core/crypto/crypto_sha256.cc new file mode 100644 index 00000000..e4be5eb3 --- /dev/null +++ b/vendor/core/src/core/crypto/crypto_sha256.cc @@ -0,0 +1,230 @@ +#include + +#include // std::array +#include // std::uint32_t, std::uint64_t + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL +#include // EVP_MD_CTX_new, EVP_DigestInit_ex, EVP_sha256, EVP_DigestUpdate, EVP_DigestFinal_ex, EVP_MD_CTX_free +#include // std::runtime_error +#else +#include // std::memcpy +#endif + +namespace { +constexpr std::array HEX_DIGITS{{'0', '1', '2', '3', '4', '5', '6', + '7', '8', '9', 'a', 'b', 'c', 'd', + 'e', 'f', '\0'}}; +} // namespace + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + +namespace sourcemeta::core { + +auto sha256(const std::string_view input, std::ostream &output) -> void { + auto *context = EVP_MD_CTX_new(); + if (context == nullptr) { + throw std::runtime_error("Could not allocate OpenSSL digest context"); + } + + if (EVP_DigestInit_ex(context, EVP_sha256(), nullptr) != 1 || + EVP_DigestUpdate(context, input.data(), input.size()) != 1) { + EVP_MD_CTX_free(context); + throw std::runtime_error("Could not compute SHA-256 digest"); + } + + std::array digest{}; + unsigned int length = 0; + if (EVP_DigestFinal_ex(context, digest.data(), &length) != 1) { + EVP_MD_CTX_free(context); + throw std::runtime_error("Could not finalize SHA-256 digest"); + } + + EVP_MD_CTX_free(context); + + for (std::uint64_t index = 0; index < 32u; ++index) { + output.put(HEX_DIGITS[(digest[index] >> 4u) & 0x0fu]); + output.put(HEX_DIGITS[digest[index] & 0x0fu]); + } +} + +} // namespace sourcemeta::core + +#else + +namespace { + +inline constexpr auto rotate_right(std::uint32_t value, + std::uint64_t count) noexcept + -> std::uint32_t { + return (value >> count) | (value << (32u - count)); +} + +// FIPS 180-4 Section 4.1.2 logical functions +inline constexpr auto big_sigma_0(std::uint32_t value) noexcept + -> std::uint32_t { + return rotate_right(value, 2u) ^ rotate_right(value, 13u) ^ + rotate_right(value, 22u); +} + +inline constexpr auto big_sigma_1(std::uint32_t value) noexcept + -> std::uint32_t { + return rotate_right(value, 6u) ^ rotate_right(value, 11u) ^ + rotate_right(value, 25u); +} + +inline constexpr auto small_sigma_0(std::uint32_t value) noexcept + -> std::uint32_t { + return rotate_right(value, 7u) ^ rotate_right(value, 18u) ^ (value >> 3u); +} + +inline constexpr auto small_sigma_1(std::uint32_t value) noexcept + -> std::uint32_t { + return rotate_right(value, 17u) ^ rotate_right(value, 19u) ^ (value >> 10u); +} + +// Equivalent to (x & y) ^ (~x & z) but avoids a bitwise NOT +inline constexpr auto choice(std::uint32_t x, std::uint32_t y, + std::uint32_t z) noexcept -> std::uint32_t { + return z ^ (x & (y ^ z)); +} + +inline constexpr auto majority(std::uint32_t x, std::uint32_t y, + std::uint32_t z) noexcept -> std::uint32_t { + return (x & y) ^ (x & z) ^ (y & z); +} + +inline auto sha256_process_block(const unsigned char *block, + std::array &state) noexcept + -> void { + // First 32 bits of the fractional parts of the cube roots + // of the first 64 prime numbers (FIPS 180-4 Section 4.2.2) + static constexpr std::array round_constants = { + {0x428a2f98U, 0x71374491U, 0xb5c0fbcfU, 0xe9b5dba5U, 0x3956c25bU, + 0x59f111f1U, 0x923f82a4U, 0xab1c5ed5U, 0xd807aa98U, 0x12835b01U, + 0x243185beU, 0x550c7dc3U, 0x72be5d74U, 0x80deb1feU, 0x9bdc06a7U, + 0xc19bf174U, 0xe49b69c1U, 0xefbe4786U, 0x0fc19dc6U, 0x240ca1ccU, + 0x2de92c6fU, 0x4a7484aaU, 0x5cb0a9dcU, 0x76f988daU, 0x983e5152U, + 0xa831c66dU, 0xb00327c8U, 0xbf597fc7U, 0xc6e00bf3U, 0xd5a79147U, + 0x06ca6351U, 0x14292967U, 0x27b70a85U, 0x2e1b2138U, 0x4d2c6dfcU, + 0x53380d13U, 0x650a7354U, 0x766a0abbU, 0x81c2c92eU, 0x92722c85U, + 0xa2bfe8a1U, 0xa81a664bU, 0xc24b8b70U, 0xc76c51a3U, 0xd192e819U, + 0xd6990624U, 0xf40e3585U, 0x106aa070U, 0x19a4c116U, 0x1e376c08U, + 0x2748774cU, 0x34b0bcb5U, 0x391c0cb3U, 0x4ed8aa4aU, 0x5b9cca4fU, + 0x682e6ff3U, 0x748f82eeU, 0x78a5636fU, 0x84c87814U, 0x8cc70208U, + 0x90befffaU, 0xa4506cebU, 0xbef9a3f7U, 0xc67178f2U}}; + + // Decode 16 big-endian 32-bit words from the block + std::array schedule; + for (std::uint64_t word_index = 0; word_index < 16u; ++word_index) { + const std::uint64_t byte_index = word_index * 4u; + schedule[word_index] = + (static_cast(block[byte_index]) << 24u) | + (static_cast(block[byte_index + 1u]) << 16u) | + (static_cast(block[byte_index + 2u]) << 8u) | + static_cast(block[byte_index + 3u]); + } + + // Extend the message schedule (FIPS 180-4 Section 6.2.2 step 1) + for (std::uint64_t index = 16u; index < 64u; ++index) { + schedule[index] = + small_sigma_1(schedule[index - 2u]) + schedule[index - 7u] + + small_sigma_0(schedule[index - 15u]) + schedule[index - 16u]; + } + + auto working = state; + + // Compression function (FIPS 180-4 Section 6.2.2 step 3) + for (std::uint64_t round_index = 0u; round_index < 64u; ++round_index) { + const auto temporary_1 = working[7] + big_sigma_1(working[4]) + + choice(working[4], working[5], working[6]) + + round_constants[round_index] + + schedule[round_index]; + const auto temporary_2 = + big_sigma_0(working[0]) + majority(working[0], working[1], working[2]); + + working[7] = working[6]; + working[6] = working[5]; + working[5] = working[4]; + working[4] = working[3] + temporary_1; + working[3] = working[2]; + working[2] = working[1]; + working[1] = working[0]; + working[0] = temporary_1 + temporary_2; + } + + for (std::uint64_t index = 0u; index < 8u; ++index) { + state[index] += working[index]; + } +} + +} // namespace + +namespace sourcemeta::core { + +auto sha256(const std::string_view input, std::ostream &output) -> void { + // Initial hash values: first 32 bits of the fractional parts of the + // square roots of the first 8 primes (FIPS 180-4 Section 5.3.3) + std::array state{}; + state[0] = 0x6a09e667U; + state[1] = 0xbb67ae85U; + state[2] = 0x3c6ef372U; + state[3] = 0xa54ff53aU; + state[4] = 0x510e527fU; + state[5] = 0x9b05688cU; + state[6] = 0x1f83d9abU; + state[7] = 0x5be0cd19U; + + const auto *const input_bytes = + reinterpret_cast(input.data()); + const std::size_t input_length = input.size(); + + // Process all full 64-byte blocks directly from the input (streaming) + std::size_t processed_bytes = 0u; + while (input_length - processed_bytes >= 64u) { + sha256_process_block(input_bytes + processed_bytes, state); + processed_bytes += 64u; + } + + // Prepare the final block(s) (one or two 64-byte blocks) + std::array final_block{}; + const std::size_t remaining_bytes = input_length - processed_bytes; + if (remaining_bytes > 0u) { + std::memcpy(final_block.data(), input_bytes + processed_bytes, + remaining_bytes); + } + + // Append the 0x80 byte after the message data + final_block[remaining_bytes] = 0x80u; + + // Append length in bits as big-endian 64-bit at the end of the padding + const std::uint64_t message_length_bits = + static_cast(input_length) * 8ull; + + if (remaining_bytes < 56u) { + for (std::uint64_t index = 0u; index < 8u; ++index) { + final_block[56u + index] = static_cast( + (message_length_bits >> (8u * (7u - index))) & 0xffu); + } + sha256_process_block(final_block.data(), state); + } else { + for (std::uint64_t index = 0u; index < 8u; ++index) { + final_block[64u + 56u + index] = static_cast( + (message_length_bits >> (8u * (7u - index))) & 0xffu); + } + + sha256_process_block(final_block.data(), state); + sha256_process_block(final_block.data() + 64u, state); + } + + for (std::uint64_t state_index = 0u; state_index < 8u; ++state_index) { + const auto value = state[state_index]; + for (std::uint64_t nibble = 0u; nibble < 8u; ++nibble) { + const auto shift = 28u - nibble * 4u; + output.put(HEX_DIGITS[(value >> shift) & 0x0fu]); + } + } +} + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/crypto/crypto_uuid.cc b/vendor/core/src/core/crypto/crypto_uuid.cc new file mode 100644 index 00000000..cb7f674e --- /dev/null +++ b/vendor/core/src/core/crypto/crypto_uuid.cc @@ -0,0 +1,81 @@ +#include + +#include // std::array +#include // std::size_t +#include // std::string_view + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL +#include // RAND_bytes +#include // std::runtime_error +#else +#include // std::random_device, std::mt19937, std::uniform_int_distribution +#endif + +namespace sourcemeta::core { + +// See RFC 9562 Section 5.4 +// Format: xxxxxxxx-xxxx-4xxx-Nxxx-xxxxxxxxxxxx +// where 4 is the version and N is the variant (8, 9, a, or b) +auto uuidv4() -> std::string { + static constexpr std::string_view digits = "0123456789abcdef"; + static constexpr std::string_view variant_digits = "89ab"; + static constexpr std::array dash = { + {false, false, false, false, true, false, true, false, true, false, true, + false, false, false, false, false}}; + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + std::array random_bytes{}; + if (RAND_bytes(random_bytes.data(), static_cast(random_bytes.size())) != + 1) { + throw std::runtime_error("Could not generate random bytes with OpenSSL"); + } +#else + static std::random_device device; + static std::mt19937 generator{device()}; + std::uniform_int_distribution distribution(0, + 15); + std::uniform_int_distribution + variant_distribution(0, 3); +#endif + + std::string result; + result.reserve(36); + for (std::size_t index = 0; index < dash.size(); ++index) { + if (dash[index]) { + result += '-'; + } + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + const auto high_nibble = (random_bytes[index] >> 4u) & 0x0fu; + const auto low_nibble = random_bytes[index] & 0x0fu; +#endif + + // RFC 9562 Section 5.4: version bits (48-51) must be 0b0100 + if (index == 6) { + result += '4'; + // RFC 9562 Section 5.4: variant bits (64-65) must be 0b10 + } else if (index == 8) { +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + result += variant_digits[high_nibble & 0x03u]; +#else + result += variant_digits[variant_distribution(generator)]; +#endif + } else { +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + result += digits[high_nibble]; +#else + result += digits[distribution(generator)]; +#endif + } + +#ifdef SOURCEMETA_CORE_CRYPTO_USE_SYSTEM_OPENSSL + result += digits[low_nibble]; +#else + result += digits[distribution(generator)]; +#endif + } + + return result; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/crypto/include/sourcemeta/core/crypto.h b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto.h new file mode 100644 index 00000000..332cc842 --- /dev/null +++ b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto.h @@ -0,0 +1,16 @@ +#ifndef SOURCEMETA_CORE_CRYPTO_H_ +#define SOURCEMETA_CORE_CRYPTO_H_ + +/// @defgroup crypto Crypto +/// @brief Cryptographic hash functions and UUID generation. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +#include +#include + +#endif diff --git a/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_sha256.h b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_sha256.h new file mode 100644 index 00000000..34c15834 --- /dev/null +++ b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_sha256.h @@ -0,0 +1,30 @@ +#ifndef SOURCEMETA_CORE_CRYPTO_SHA256_H_ +#define SOURCEMETA_CORE_CRYPTO_SHA256_H_ + +#ifndef SOURCEMETA_CORE_CRYPTO_EXPORT +#include +#endif + +#include // std::ostream +#include // std::string_view + +namespace sourcemeta::core { + +/// @ingroup crypto +/// Hash a string using SHA-256. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// std::ostringstream result; +/// sourcemeta::core::sha256("foo bar", result); +/// std::cout << result.str() << "\n"; +/// ``` +auto SOURCEMETA_CORE_CRYPTO_EXPORT sha256(const std::string_view input, + std::ostream &output) -> void; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_uuid.h b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_uuid.h new file mode 100644 index 00000000..1439a9ae --- /dev/null +++ b/vendor/core/src/core/crypto/include/sourcemeta/core/crypto_uuid.h @@ -0,0 +1,27 @@ +#ifndef SOURCEMETA_CORE_CRYPTO_UUID_H_ +#define SOURCEMETA_CORE_CRYPTO_UUID_H_ + +#ifndef SOURCEMETA_CORE_CRYPTO_EXPORT +#include +#endif + +#include // std::string + +namespace sourcemeta::core { + +/// @ingroup crypto +/// Generate a random UUID v4 string. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// std::cout << sourcemeta::core::uuidv4() << "\n"; +/// ``` +/// +/// See https://www.rfc-editor.org/rfc/rfc9562#name-uuid-version-4 +SOURCEMETA_CORE_CRYPTO_EXPORT auto uuidv4() -> std::string; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/html/CMakeLists.txt b/vendor/core/src/core/html/CMakeLists.txt index 6b35797f..60855893 100644 --- a/vendor/core/src/core/html/CMakeLists.txt +++ b/vendor/core/src/core/html/CMakeLists.txt @@ -1,7 +1,9 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME html - PRIVATE_HEADERS escape.h encoder.h elements.h - SOURCES escape.cc encoder.cc) + PRIVATE_HEADERS buffer.h escape.h writer.h + SOURCES escape.cc writer.cc) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME html) endif() + +target_link_libraries(sourcemeta_core_html PUBLIC sourcemeta::core::preprocessor) diff --git a/vendor/core/src/core/html/encoder.cc b/vendor/core/src/core/html/encoder.cc deleted file mode 100644 index ffddbc5e..00000000 --- a/vendor/core/src/core/html/encoder.cc +++ /dev/null @@ -1,74 +0,0 @@ -#include - -#include // std::ostream -#include // std::ostringstream -#include // std::string - -namespace sourcemeta::core { - -auto HTML::render() const -> std::string { - std::ostringstream output_stream; - output_stream << "<" << this->tag_name; - - // Render attributes - for (const auto &[attribute_name, attribute_value] : this->attributes) { - std::string escaped_value{attribute_value}; - html_escape(escaped_value); - output_stream << " " << attribute_name << "=\"" << escaped_value << "\""; - } - - if (this->self_closing) { - output_stream << " />"; - return output_stream.str(); - } - - output_stream << ">"; - - // Render children - if (this->child_elements.empty()) { - output_stream << "tag_name << ">"; - } else if (this->child_elements.size() == 1 && - std::get_if(&this->child_elements[0])) { - // Inline single text node - output_stream << this->render(this->child_elements[0]); - output_stream << "tag_name << ">"; - } else { - // Block level children - for (const auto &child_element : this->child_elements) { - output_stream << this->render(child_element); - } - output_stream << "tag_name << ">"; - } - - return output_stream.str(); -} - -auto HTML::render(const HTMLNode &child_element) const -> std::string { - if (const auto *text = std::get_if(&child_element)) { - std::string escaped_text{*text}; - html_escape(escaped_text); - return escaped_text; - } else if (const auto *raw_html = std::get_if(&child_element)) { - return raw_html->content; - } else if (const auto *html_element = std::get_if(&child_element)) { - return html_element->render(); - } - return ""; -} - -auto HTML::push_back(const HTMLNode &child) -> HTML & { - this->child_elements.push_back(child); - return *this; -} - -auto HTML::push_back(HTMLNode &&child) -> HTML & { - this->child_elements.push_back(std::move(child)); - return *this; -} - -auto operator<<(std::ostream &output_stream, const HTML &html_element) - -> std::ostream & { - return output_stream << html_element.render(); -} - -} // namespace sourcemeta::core diff --git a/vendor/core/src/core/html/escape.cc b/vendor/core/src/core/html/escape.cc index 6070d729..6b3097a1 100644 --- a/vendor/core/src/core/html/escape.cc +++ b/vendor/core/src/core/html/escape.cc @@ -93,4 +93,81 @@ auto html_escape(std::string &text) -> void { } } +static auto needs_escape(const std::string_view input) -> bool { + for (const char character : input) { + switch (character) { + case '&': + case '<': + case '>': + case '"': + case '\'': + return true; + default: + break; + } + } + + return false; +} + +auto html_escape_append(std::string &output, const std::string_view input) + -> void { + if (!needs_escape(input)) { + output += input; + return; + } + + for (const char character : input) { + switch (character) { + case '&': + output += "&"; + break; + case '<': + output += "<"; + break; + case '>': + output += ">"; + break; + case '"': + output += """; + break; + case '\'': + output += "'"; + break; + default: + output += character; + } + } +} + +auto html_escape_append(HTMLBuffer &output, const std::string_view input) + -> void { + if (!needs_escape(input)) { + output.append(input); + return; + } + + for (const char character : input) { + switch (character) { + case '&': + output.append("&"); + break; + case '<': + output.append("<"); + break; + case '>': + output.append(">"); + break; + case '"': + output.append("""); + break; + case '\'': + output.append("'"); + break; + default: + output.append(character); + } + } +} + } // namespace sourcemeta::core diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html.h b/vendor/core/src/core/html/include/sourcemeta/core/html.h index b2a09289..23853c6c 100644 --- a/vendor/core/src/core/html/include/sourcemeta/core/html.h +++ b/vendor/core/src/core/html/include/sourcemeta/core/html.h @@ -11,7 +11,6 @@ /// #include /// ``` -#include -#include +#include #endif diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html_buffer.h b/vendor/core/src/core/html/include/sourcemeta/core/html_buffer.h new file mode 100644 index 00000000..7c0c9f3c --- /dev/null +++ b/vendor/core/src/core/html/include/sourcemeta/core/html_buffer.h @@ -0,0 +1,93 @@ +#ifndef SOURCEMETA_CORE_HTML_BUFFER_H_ +#define SOURCEMETA_CORE_HTML_BUFFER_H_ + +#ifndef SOURCEMETA_CORE_HTML_EXPORT +#include +#endif + +#include + +#include // std::memcpy +#include // std::ostream +#include // std::string +#include // std::string_view + +namespace sourcemeta::core { + +/// @ingroup html +/// A fast append-only string buffer +class SOURCEMETA_CORE_HTML_EXPORT HTMLBuffer { +public: + HTMLBuffer() = default; + HTMLBuffer(const HTMLBuffer &) = delete; + auto operator=(const HTMLBuffer &) -> HTMLBuffer & = delete; + HTMLBuffer(HTMLBuffer &&) = delete; + auto operator=(HTMLBuffer &&) -> HTMLBuffer & = delete; + + SOURCEMETA_FORCEINLINE inline auto reserve(const std::size_t bytes) -> void { + this->buffer_.resize(bytes); + this->cursor_ = this->buffer_.data(); + this->end_ = this->cursor_ + bytes; + } + + SOURCEMETA_FORCEINLINE inline auto append(const char character) -> void { + if (!this->cursor_ || this->cursor_ >= this->end_) [[unlikely]] { + this->grow(1); + } + + *this->cursor_ = character; + ++this->cursor_; + } + + SOURCEMETA_FORCEINLINE inline auto append(const std::string_view data) + -> void { + const auto length{data.size()}; + if (length == 0) { + return; + } + + const auto remaining{ + this->cursor_ ? static_cast(this->end_ - this->cursor_) + : std::size_t{0}}; + if (remaining < length) [[unlikely]] { + this->grow(length); + } + + std::memcpy(this->cursor_, data.data(), length); + this->cursor_ += length; + } + + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto str() + -> const std::string & { + if (this->cursor_) { + this->buffer_.resize( + static_cast(this->cursor_ - this->buffer_.data())); + this->cursor_ = nullptr; + this->end_ = nullptr; + } + + return this->buffer_; + } + + auto write(std::ostream &stream) -> void; + +private: + auto grow(std::size_t needed) -> void; + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251) +#endif + std::string buffer_; +#if defined(_MSC_VER) +#pragma warning(default : 4251) +#endif + char *cursor_{nullptr}; + char *end_{nullptr}; +}; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html_elements.h b/vendor/core/src/core/html/include/sourcemeta/core/html_elements.h deleted file mode 100644 index a7218265..00000000 --- a/vendor/core/src/core/html/include/sourcemeta/core/html_elements.h +++ /dev/null @@ -1,450 +0,0 @@ -#ifndef SOURCEMETA_CORE_HTML_ELEMENTS_H_ -#define SOURCEMETA_CORE_HTML_ELEMENTS_H_ - -#include - -namespace sourcemeta::core::html { - -#ifndef DOXYGEN -#define HTML_VOID_ELEMENT(name) \ - inline auto name() -> HTML { return HTML(#name, true); } \ - inline auto name(HTMLAttributes attributes) -> HTML { \ - return HTML(#name, std::move(attributes), true); \ - } - -#define HTML_CONTAINER_ELEMENT_NAMED(name, tag) \ - inline auto name(HTMLAttributes attributes) -> HTML { \ - return HTML(#tag, std::move(attributes)); \ - } \ - template \ - inline auto name(HTMLAttributes attributes, Children &&...children) \ - -> HTML { \ - return HTML(#tag, std::move(attributes), \ - std::forward(children)...); \ - } \ - template \ - inline auto name(Children &&...children) -> HTML { \ - return HTML(#tag, std::forward(children)...); \ - } - -#define HTML_CONTAINER_ELEMENT(name) HTML_CONTAINER_ELEMENT_NAMED(name, name) - -#define HTML_COMPACT_ELEMENT(name) \ - inline auto name(HTMLAttributes attributes) -> HTML { \ - return HTML(#name, std::move(attributes)); \ - } \ - template \ - inline auto name(HTMLAttributes attributes, Children &&...children) \ - -> HTML { \ - return HTML(#name, std::move(attributes), \ - std::forward(children)...); \ - } \ - template \ - inline auto name(Children &&...children) -> HTML { \ - return HTML(#name, std::forward(children)...); \ - } - -#define HTML_VOID_ATTR_ELEMENT(name) \ - inline auto name(HTMLAttributes attributes) -> HTML { \ - return HTML(#name, std::move(attributes), true); \ - } -#endif - -/// @ingroup html -inline auto raw(std::string html_content) -> HTMLRaw { - return HTMLRaw{std::move(html_content)}; -} - -// ============================================================================= -// Document Structure Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(html) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(base) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(head) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(link) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(meta) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(style) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(title) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(body) - -// ============================================================================= -// Content Sectioning Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(address) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(article) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(aside) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(footer) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(header) - -/// @ingroup html -HTML_COMPACT_ELEMENT(h1) -/// @ingroup html -HTML_COMPACT_ELEMENT(h2) -/// @ingroup html -HTML_COMPACT_ELEMENT(h3) -/// @ingroup html -HTML_COMPACT_ELEMENT(h4) -/// @ingroup html -HTML_COMPACT_ELEMENT(h5) -/// @ingroup html -HTML_COMPACT_ELEMENT(h6) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(hgroup) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(main) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(nav) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(section) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(search) - -// ============================================================================= -// Text Content Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(blockquote) - -/// @ingroup html -HTML_COMPACT_ELEMENT(dd) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(div) - -/// @ingroup html -HTML_COMPACT_ELEMENT(dl) - -/// @ingroup html -HTML_COMPACT_ELEMENT(dt) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(figcaption) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(figure) - -/// @ingroup html -HTML_VOID_ELEMENT(hr) - -/// @ingroup html -HTML_COMPACT_ELEMENT(li) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(menu) - -/// @ingroup html -HTML_COMPACT_ELEMENT(ol) - -/// @ingroup html -HTML_COMPACT_ELEMENT(p) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(pre) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(ul) - -// ============================================================================= -// Inline Text Semantics Elements -// ============================================================================= - -/// @ingroup html -HTML_COMPACT_ELEMENT(a) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(abbr) - -/// @ingroup html -HTML_COMPACT_ELEMENT(b) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(bdi) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(bdo) - -/// @ingroup html -HTML_VOID_ELEMENT(br) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(cite) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(code) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(data) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(dfn) - -/// @ingroup html -HTML_COMPACT_ELEMENT(em) - -/// @ingroup html -HTML_COMPACT_ELEMENT(i) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(kbd) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(mark) - -/// @ingroup html -HTML_COMPACT_ELEMENT(q) - -/// @ingroup html -HTML_COMPACT_ELEMENT(rp) - -/// @ingroup html -HTML_COMPACT_ELEMENT(rt) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(ruby) - -/// @ingroup html -HTML_COMPACT_ELEMENT(s) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(samp) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(small) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(span) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(strong) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(sub) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(sup) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(time) - -/// @ingroup html -HTML_COMPACT_ELEMENT(u) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(var) - -/// @ingroup html -HTML_VOID_ELEMENT(wbr) - -// ============================================================================= -// Image and Multimedia Elements -// ============================================================================= - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(area) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(audio) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(img) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(map) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(track) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(video) - -// ============================================================================= -// Embedded Content Elements -// ============================================================================= - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(embed) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(iframe) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(object) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(picture) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(portal) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(source) - -// ============================================================================= -// Scripting Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(canvas) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(noscript) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(script) - -// ============================================================================= -// Demarcating Edits Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(del) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(ins) - -// ============================================================================= -// Table Content Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(caption) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(col) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(colgroup) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(table) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(tbody) - -/// @ingroup html -HTML_COMPACT_ELEMENT(td) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(tfoot) - -/// @ingroup html -HTML_COMPACT_ELEMENT(th) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(thead) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(tr) - -// ============================================================================= -// Forms Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(button) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(datalist) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(fieldset) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(form) - -/// @ingroup html -HTML_VOID_ATTR_ELEMENT(input) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(label) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(legend) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(meter) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(optgroup) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(option) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(output) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(progress) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(select) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(textarea) - -// ============================================================================= -// Interactive Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(details) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(dialog) - -/// @ingroup html -HTML_CONTAINER_ELEMENT(summary) - -// ============================================================================= -// Web Components Elements -// ============================================================================= - -/// @ingroup html -HTML_CONTAINER_ELEMENT(slot) - -/// @ingroup html -HTML_CONTAINER_ELEMENT_NAMED(template_, template) - -#ifndef DOXYGEN -#undef HTML_VOID_ELEMENT -#undef HTML_CONTAINER_ELEMENT -#undef HTML_CONTAINER_ELEMENT_NAMED -#undef HTML_COMPACT_ELEMENT -#undef HTML_VOID_ATTR_ELEMENT -#endif - -} // namespace sourcemeta::core::html - -#endif diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html_encoder.h b/vendor/core/src/core/html/include/sourcemeta/core/html_encoder.h deleted file mode 100644 index 7be0433e..00000000 --- a/vendor/core/src/core/html/include/sourcemeta/core/html_encoder.h +++ /dev/null @@ -1,145 +0,0 @@ -#ifndef SOURCEMETA_CORE_HTML_ENCODER_H_ -#define SOURCEMETA_CORE_HTML_ENCODER_H_ - -#ifndef SOURCEMETA_CORE_HTML_EXPORT -#include -#endif - -#include - -#include // std::ostream -#include // std::string -#include // std::pair -#include // std::variant, std::holds_alternative, std::get -#include // std::vector - -namespace sourcemeta::core { - -/// @ingroup html -using HTMLAttributes = std::vector>; - -#ifndef DOXYGEN -// Forward declaration -class HTML; -#endif - -/// @ingroup html -/// Raw HTML content wrapper for unescaped content -struct SOURCEMETA_CORE_HTML_EXPORT HTMLRaw { -// Exporting symbols that depends on the standard C++ library is considered -// safe. -// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN -#if defined(_MSC_VER) -#pragma warning(disable : 4251) -#endif - std::string content; -#if defined(_MSC_VER) -#pragma warning(default : 4251) -#endif - explicit HTMLRaw(std::string html_content) - : content{std::move(html_content)} {} -}; - -/// @ingroup html -/// A node can be either a string (text node), raw HTML content, or another HTML -/// element -using HTMLNode = std::variant; - -/// @ingroup html -/// An HTML element that can be rendered to a string. Elements can contain -/// attributes and child nodes. -/// -/// For example: -/// -/// ```cpp -/// #include -/// #include -/// #include -/// -/// using namespace sourcemeta::core::html; -/// -/// std::ostringstream result; -/// result << div(h1("Title"), p("Content")); -/// assert(result.str() == "

Title

Content

"); -/// ``` -class SOURCEMETA_CORE_HTML_EXPORT HTML { -public: - HTML(std::string tag, bool self_closing_tag = false) - : tag_name(std::move(tag)), self_closing(self_closing_tag) {} - - HTML(std::string tag, HTMLAttributes tag_attributes, - bool self_closing_tag = false) - : tag_name(std::move(tag)), attributes(std::move(tag_attributes)), - self_closing(self_closing_tag) {} - - HTML(std::string tag, HTMLAttributes tag_attributes, - std::vector children) - : tag_name(std::move(tag)), attributes(std::move(tag_attributes)), - child_elements(std::move(children)), self_closing(false) {} - - HTML(std::string tag, HTMLAttributes tag_attributes, - std::vector children) - : tag_name(std::move(tag)), attributes(std::move(tag_attributes)), - self_closing(false) { - this->child_elements.reserve(children.size()); - for (auto &child_element : children) { - this->child_elements.emplace_back(std::move(child_element)); - } - } - - HTML(std::string tag, std::vector children) - : tag_name(std::move(tag)), child_elements(std::move(children)), - self_closing(false) {} - - HTML(std::string tag, std::vector children) - : tag_name(std::move(tag)), self_closing(false) { - this->child_elements.reserve(children.size()); - for (auto &child_element : children) { - this->child_elements.emplace_back(std::move(child_element)); - } - } - - template - HTML(std::string tag, HTMLAttributes tag_attributes, Children &&...children) - : tag_name(std::move(tag)), attributes(std::move(tag_attributes)), - self_closing(false) { - (this->child_elements.push_back(std::forward(children)), ...); - } - - template - HTML(std::string tag, Children &&...children) - : tag_name(std::move(tag)), self_closing(false) { - (this->child_elements.push_back(std::forward(children)), ...); - } - - [[nodiscard]] auto render() const -> std::string; - - auto push_back(const HTMLNode &child) -> HTML &; - auto push_back(HTMLNode &&child) -> HTML &; - - // Stream operator declaration - friend SOURCEMETA_CORE_HTML_EXPORT auto - operator<<(std::ostream &output_stream, const HTML &html_element) - -> std::ostream &; - -private: -// Exporting symbols that depends on the standard C++ library is considered -// safe. -// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN -#if defined(_MSC_VER) -#pragma warning(disable : 4251) -#endif - std::string tag_name; - HTMLAttributes attributes; - std::vector child_elements; -#if defined(_MSC_VER) -#pragma warning(default : 4251) -#endif - bool self_closing; - - [[nodiscard]] auto render(const HTMLNode &child_element) const -> std::string; -}; - -} // namespace sourcemeta::core - -#endif diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html_escape.h b/vendor/core/src/core/html/include/sourcemeta/core/html_escape.h index 2d5d11e8..4062c412 100644 --- a/vendor/core/src/core/html/include/sourcemeta/core/html_escape.h +++ b/vendor/core/src/core/html/include/sourcemeta/core/html_escape.h @@ -5,7 +5,10 @@ #include #endif -#include // std::string +#include + +#include // std::string +#include // std::string_view namespace sourcemeta::core { @@ -33,6 +36,17 @@ namespace sourcemeta::core { SOURCEMETA_CORE_HTML_EXPORT auto html_escape(std::string &text) -> void; +/// @ingroup html +/// Append the HTML-escaped form of `input` directly to `output`, +/// without allocating a temporary string. +SOURCEMETA_CORE_HTML_EXPORT +auto html_escape_append(std::string &output, std::string_view input) -> void; + +/// @ingroup html +/// Append the HTML-escaped form of `input` directly to a buffer. +SOURCEMETA_CORE_HTML_EXPORT +auto html_escape_append(HTMLBuffer &output, std::string_view input) -> void; + } // namespace sourcemeta::core #endif diff --git a/vendor/core/src/core/html/include/sourcemeta/core/html_writer.h b/vendor/core/src/core/html/include/sourcemeta/core/html_writer.h new file mode 100644 index 00000000..384f1307 --- /dev/null +++ b/vendor/core/src/core/html/include/sourcemeta/core/html_writer.h @@ -0,0 +1,466 @@ +#ifndef SOURCEMETA_CORE_HTML_WRITER_H_ +#define SOURCEMETA_CORE_HTML_WRITER_H_ + +#ifndef SOURCEMETA_CORE_HTML_EXPORT +#include +#endif + +#include +#include +#include + +#include // assert +#include // std::string_view +#include // std::vector + +namespace sourcemeta::core { + +/// @ingroup html +/// A streaming HTML writer that renders directly to a string buffer. +/// No intermediate DOM tree is built. Elements are serialized as methods +/// are called. +/// +/// ```cpp +/// #include +/// #include +/// +/// sourcemeta::core::HTMLWriter document; +/// document.div().attribute("class", "greeting"); +/// document.h1("Hello"); +/// document.p("World"); +/// document.close(); +/// ``` +class SOURCEMETA_CORE_HTML_EXPORT HTMLWriter { +public: + /// Pre-allocate the output buffer + SOURCEMETA_FORCEINLINE inline auto reserve(std::size_t bytes) -> void { + this->buffer_.reserve(bytes); + } + + /// Close the most recently opened element + SOURCEMETA_FORCEINLINE inline auto close() -> HTMLWriter & { + this->flush_open_tag(); + assert(!this->tag_stack_.empty()); + this->buffer_.append("buffer_.append(this->tag_stack_.back()); + this->buffer_.append(">"); + this->tag_stack_.pop_back(); + return *this; + } + + /// Add an attribute to the currently open tag. Must be called + /// immediately after an element method and before any content. + SOURCEMETA_FORCEINLINE inline auto attribute(std::string_view name, + std::string_view value) + -> HTMLWriter & { + assert(this->tag_open_); + this->buffer_.append(" "); + this->buffer_.append(name); + this->buffer_.append("=\""); + html_escape_append(this->buffer_, value); + this->buffer_.append("\""); + return *this; + } + + /// Write HTML-escaped text content + SOURCEMETA_FORCEINLINE inline auto text(std::string_view content) + -> HTMLWriter & { + this->flush_open_tag(); + html_escape_append(this->buffer_, content); + return *this; + } + + /// Write raw HTML content (not escaped) + SOURCEMETA_FORCEINLINE inline auto raw(std::string_view content) + -> HTMLWriter & { + this->flush_open_tag(); + this->buffer_.append(content); + return *this; + } + + /// Get the rendered HTML string + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto str() + -> const std::string & { + this->flush_open_tag(); + return this->buffer_.str(); + } + + /// Write the rendered HTML to an output stream + auto write(std::ostream &stream) -> void; + +// Exporting symbols that depends on the standard C++ library is considered +// safe. +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251) +#endif + +#ifndef DOXYGEN +// Macro to generate container element methods. +// Container elements write on open and on close(). +// Overloads: +// .tag() open with no attributes +// .tag(text) open, write escaped text, close (shorthand) +#define HTML_WRITER_CONTAINER(name) \ + SOURCEMETA_FORCEINLINE inline auto name() -> HTMLWriter & { \ + this->open_tag(#name); \ + return *this; \ + } \ + /* NOLINTNEXTLINE(bugprone-macro-parentheses) */ \ + SOURCEMETA_FORCEINLINE inline auto name(std::string_view text_content) \ + -> HTMLWriter & { \ + this->open_tag(#name); \ + this->text(text_content); \ + this->close(); \ + return *this; \ + } + +// Same as above but with a different C++ method name than the HTML tag +#define HTML_WRITER_CONTAINER_NAMED(name, tag) \ + SOURCEMETA_FORCEINLINE inline auto name() -> HTMLWriter & { \ + this->open_tag(#tag); \ + return *this; \ + } \ + /* NOLINTNEXTLINE(bugprone-macro-parentheses) */ \ + SOURCEMETA_FORCEINLINE inline auto name(std::string_view text_content) \ + -> HTMLWriter & { \ + this->open_tag(#tag); \ + this->text(text_content); \ + this->close(); \ + return *this; \ + } + +// Macro to generate void element methods. +// Void elements are self-closing: or +#define HTML_WRITER_VOID(name) \ + /* NOLINTNEXTLINE(bugprone-macro-parentheses) */ \ + SOURCEMETA_FORCEINLINE inline auto name() -> HTMLWriter & { \ + this->void_tag(#name); \ + return *this; \ + } +#endif + + // ========================================================================= + // Document Structure Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(html) + /// @ingroup html + HTML_WRITER_VOID(base) + /// @ingroup html + HTML_WRITER_CONTAINER(head) + /// @ingroup html + HTML_WRITER_VOID(link) + /// @ingroup html + HTML_WRITER_VOID(meta) + /// @ingroup html + HTML_WRITER_CONTAINER(style) + /// @ingroup html + HTML_WRITER_CONTAINER(title) + /// @ingroup html + HTML_WRITER_CONTAINER(body) + + // ========================================================================= + // Content Sectioning Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(address) + /// @ingroup html + HTML_WRITER_CONTAINER(article) + /// @ingroup html + HTML_WRITER_CONTAINER(aside) + /// @ingroup html + HTML_WRITER_CONTAINER(footer) + /// @ingroup html + HTML_WRITER_CONTAINER(header) + /// @ingroup html + HTML_WRITER_CONTAINER(h1) + /// @ingroup html + HTML_WRITER_CONTAINER(h2) + /// @ingroup html + HTML_WRITER_CONTAINER(h3) + /// @ingroup html + HTML_WRITER_CONTAINER(h4) + /// @ingroup html + HTML_WRITER_CONTAINER(h5) + /// @ingroup html + HTML_WRITER_CONTAINER(h6) + /// @ingroup html + HTML_WRITER_CONTAINER(hgroup) + /// @ingroup html + HTML_WRITER_CONTAINER(main) + /// @ingroup html + HTML_WRITER_CONTAINER(nav) + /// @ingroup html + HTML_WRITER_CONTAINER(section) + /// @ingroup html + HTML_WRITER_CONTAINER(search) + + // ========================================================================= + // Text Content Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(blockquote) + /// @ingroup html + HTML_WRITER_CONTAINER(dd) + /// @ingroup html + HTML_WRITER_CONTAINER(div) + /// @ingroup html + HTML_WRITER_CONTAINER(dl) + /// @ingroup html + HTML_WRITER_CONTAINER(dt) + /// @ingroup html + HTML_WRITER_CONTAINER(figcaption) + /// @ingroup html + HTML_WRITER_CONTAINER(figure) + /// @ingroup html + HTML_WRITER_VOID(hr) + /// @ingroup html + HTML_WRITER_CONTAINER(li) + /// @ingroup html + HTML_WRITER_CONTAINER(menu) + /// @ingroup html + HTML_WRITER_CONTAINER(ol) + /// @ingroup html + HTML_WRITER_CONTAINER(p) + /// @ingroup html + HTML_WRITER_CONTAINER(pre) + /// @ingroup html + HTML_WRITER_CONTAINER(ul) + + // ========================================================================= + // Inline Text Semantics Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(a) + /// @ingroup html + HTML_WRITER_CONTAINER(abbr) + /// @ingroup html + HTML_WRITER_CONTAINER(b) + /// @ingroup html + HTML_WRITER_CONTAINER(bdi) + /// @ingroup html + HTML_WRITER_CONTAINER(bdo) + /// @ingroup html + HTML_WRITER_VOID(br) + /// @ingroup html + HTML_WRITER_CONTAINER(cite) + /// @ingroup html + HTML_WRITER_CONTAINER(code) + /// @ingroup html + HTML_WRITER_CONTAINER(data) + /// @ingroup html + HTML_WRITER_CONTAINER(dfn) + /// @ingroup html + HTML_WRITER_CONTAINER(em) + /// @ingroup html + HTML_WRITER_CONTAINER(i) + /// @ingroup html + HTML_WRITER_CONTAINER(kbd) + /// @ingroup html + HTML_WRITER_CONTAINER(mark) + /// @ingroup html + HTML_WRITER_CONTAINER(q) + /// @ingroup html + HTML_WRITER_CONTAINER(rp) + /// @ingroup html + HTML_WRITER_CONTAINER(rt) + /// @ingroup html + HTML_WRITER_CONTAINER(ruby) + /// @ingroup html + HTML_WRITER_CONTAINER(s) + /// @ingroup html + HTML_WRITER_CONTAINER(samp) + /// @ingroup html + HTML_WRITER_CONTAINER(small) + /// @ingroup html + HTML_WRITER_CONTAINER(span) + /// @ingroup html + HTML_WRITER_CONTAINER(strong) + /// @ingroup html + HTML_WRITER_CONTAINER(sub) + /// @ingroup html + HTML_WRITER_CONTAINER(sup) + /// @ingroup html + HTML_WRITER_CONTAINER(time) + /// @ingroup html + HTML_WRITER_CONTAINER(u) + /// @ingroup html + HTML_WRITER_CONTAINER(var) + /// @ingroup html + HTML_WRITER_VOID(wbr) + + // ========================================================================= + // Image and Multimedia Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_VOID(area) + /// @ingroup html + HTML_WRITER_CONTAINER(audio) + /// @ingroup html + HTML_WRITER_VOID(img) + /// @ingroup html + HTML_WRITER_CONTAINER(map) + /// @ingroup html + HTML_WRITER_VOID(track) + /// @ingroup html + HTML_WRITER_CONTAINER(video) + + // ========================================================================= + // Embedded Content Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_VOID(embed) + /// @ingroup html + HTML_WRITER_CONTAINER(iframe) + /// @ingroup html + HTML_WRITER_CONTAINER(object) + /// @ingroup html + HTML_WRITER_CONTAINER(picture) + /// @ingroup html + HTML_WRITER_CONTAINER(portal) + /// @ingroup html + HTML_WRITER_VOID(source) + + // ========================================================================= + // Scripting Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(canvas) + /// @ingroup html + HTML_WRITER_CONTAINER(noscript) + /// @ingroup html + HTML_WRITER_CONTAINER(script) + + // ========================================================================= + // Demarcating Edits Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(del) + /// @ingroup html + HTML_WRITER_CONTAINER(ins) + + // ========================================================================= + // Table Content Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(caption) + /// @ingroup html + HTML_WRITER_VOID(col) + /// @ingroup html + HTML_WRITER_CONTAINER(colgroup) + /// @ingroup html + HTML_WRITER_CONTAINER(table) + /// @ingroup html + HTML_WRITER_CONTAINER(tbody) + /// @ingroup html + HTML_WRITER_CONTAINER(td) + /// @ingroup html + HTML_WRITER_CONTAINER(tfoot) + /// @ingroup html + HTML_WRITER_CONTAINER(th) + /// @ingroup html + HTML_WRITER_CONTAINER(thead) + /// @ingroup html + HTML_WRITER_CONTAINER(tr) + + // ========================================================================= + // Forms Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(button) + /// @ingroup html + HTML_WRITER_CONTAINER(datalist) + /// @ingroup html + HTML_WRITER_CONTAINER(fieldset) + /// @ingroup html + HTML_WRITER_CONTAINER(form) + /// @ingroup html + HTML_WRITER_VOID(input) + /// @ingroup html + HTML_WRITER_CONTAINER(label) + /// @ingroup html + HTML_WRITER_CONTAINER(legend) + /// @ingroup html + HTML_WRITER_CONTAINER(meter) + /// @ingroup html + HTML_WRITER_CONTAINER(optgroup) + /// @ingroup html + HTML_WRITER_CONTAINER(option) + /// @ingroup html + HTML_WRITER_CONTAINER(output) + /// @ingroup html + HTML_WRITER_CONTAINER(progress) + /// @ingroup html + HTML_WRITER_CONTAINER(select) + /// @ingroup html + HTML_WRITER_CONTAINER(textarea) + + // ========================================================================= + // Interactive Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(details) + /// @ingroup html + HTML_WRITER_CONTAINER(dialog) + /// @ingroup html + HTML_WRITER_CONTAINER(summary) + + // ========================================================================= + // Web Components Elements + // ========================================================================= + + /// @ingroup html + HTML_WRITER_CONTAINER(slot) + /// @ingroup html + HTML_WRITER_CONTAINER_NAMED(template_, template) + +#ifndef DOXYGEN +#undef HTML_WRITER_CONTAINER +#undef HTML_WRITER_CONTAINER_NAMED +#undef HTML_WRITER_VOID +#endif + +private: + SOURCEMETA_FORCEINLINE inline auto open_tag(std::string_view tag) -> void { + this->flush_open_tag(); + this->buffer_.append("<"); + this->buffer_.append(tag); + this->tag_stack_.push_back(tag); + this->tag_open_ = true; + this->tag_open_is_void_ = false; + } + + SOURCEMETA_FORCEINLINE inline auto void_tag(std::string_view tag) -> void { + this->flush_open_tag(); + this->buffer_.append("<"); + this->buffer_.append(tag); + this->tag_open_ = true; + this->tag_open_is_void_ = true; + } + + auto flush_open_tag() -> void; + + HTMLBuffer buffer_; + std::vector tag_stack_; + bool tag_open_{false}; + bool tag_open_is_void_{false}; +#if defined(_MSC_VER) +#pragma warning(default : 4251) +#endif +}; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/html/writer.cc b/vendor/core/src/core/html/writer.cc new file mode 100644 index 00000000..23087b79 --- /dev/null +++ b/vendor/core/src/core/html/writer.cc @@ -0,0 +1,49 @@ +#include + +#include // std::ostream + +namespace sourcemeta::core { + +auto HTMLBuffer::grow(const std::size_t needed) -> void { + const auto current_size{ + this->cursor_ + ? static_cast(this->cursor_ - this->buffer_.data()) + : 0}; + auto new_capacity{this->buffer_.empty() ? std::size_t{1024} + : this->buffer_.size() * 2}; + while (new_capacity < current_size + needed) { + new_capacity *= 2; + } + + this->buffer_.resize(new_capacity); + this->cursor_ = this->buffer_.data() + current_size; + this->end_ = this->buffer_.data() + new_capacity; +} + +auto HTMLBuffer::write(std::ostream &stream) -> void { + if (this->cursor_) { + const auto size{ + static_cast(this->cursor_ - this->buffer_.data())}; + stream.write(this->buffer_.data(), static_cast(size)); + } +} + +auto HTMLWriter::flush_open_tag() -> void { + if (this->tag_open_) { + if (this->tag_open_is_void_) { + this->buffer_.append(" />"); + } else { + this->buffer_.append(">"); + } + + this->tag_open_ = false; + this->tag_open_is_void_ = false; + } +} + +auto HTMLWriter::write(std::ostream &stream) -> void { + this->flush_open_tag(); + this->buffer_.write(stream); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/json/CMakeLists.txt b/vendor/core/src/core/json/CMakeLists.txt index 0dc33101..a9c05906 100644 --- a/vendor/core/src/core/json/CMakeLists.txt +++ b/vendor/core/src/core/json/CMakeLists.txt @@ -7,4 +7,6 @@ if(SOURCEMETA_CORE_INSTALL) endif() target_link_libraries(sourcemeta_core_json PRIVATE sourcemeta::core::io) +target_link_libraries(sourcemeta_core_json PRIVATE sourcemeta::core::unicode) target_link_libraries(sourcemeta_core_json PUBLIC sourcemeta::core::numeric) +target_link_libraries(sourcemeta_core_json PUBLIC sourcemeta::core::preprocessor) diff --git a/vendor/core/src/core/json/construct.h b/vendor/core/src/core/json/construct.h new file mode 100644 index 00000000..c2cd5674 --- /dev/null +++ b/vendor/core/src/core/json/construct.h @@ -0,0 +1,648 @@ +#ifndef SOURCEMETA_CORE_JSON_CONSTRUCT_H_ +#define SOURCEMETA_CORE_JSON_CONSTRUCT_H_ + +#include +#include + +#include +#include + +#include "parser.h" + +#include // assert +#include // std::size_t +#include // std::uint64_t, std::uint32_t +#include // std::memchr +#include // std::reference_wrapper +#include // std::invalid_argument +#include // std::move +#include // std::vector + +namespace sourcemeta::core { + +namespace internal { + +inline auto unescape_string(const char *data, const std::uint32_t length) -> + typename JSON::String { + typename JSON::String result; + const char *cursor{data}; + const char *string_end{data + length}; + + if (!std::memchr(data, '\\', length)) { + result.append(data, length); + return result; + } + + result.reserve(length); + while (cursor < string_end) { + const char *scan{cursor}; + while (scan < string_end && *scan != '\\') { + scan++; + } + + if (scan > cursor) { + result.append(cursor, static_cast(scan - cursor)); + cursor = scan; + } + + if (cursor >= string_end) { + break; + } + + assert(*cursor == '\\'); + cursor++; + assert(cursor < string_end); + + switch (*cursor++) { + case '"': + result.push_back('"'); + break; + case '\\': + result.push_back('\\'); + break; + case '/': + result.push_back('/'); + break; + case 'b': + result.push_back('\b'); + break; + case 'f': + result.push_back('\f'); + break; + case 'n': + result.push_back('\n'); + break; + case 'r': + result.push_back('\r'); + break; + case 't': + result.push_back('\t'); + break; + case 'u': { + auto parse_hex4 = [](const char *&position) -> unsigned long { + unsigned long value{0}; + for (std::size_t index = 0; index < 4; index++) { + const char hex_char{*position++}; + unsigned long digit; + if (hex_char >= '0' && hex_char <= '9') { + digit = static_cast(hex_char - '0'); + } else if (hex_char >= 'a' && hex_char <= 'f') { + digit = static_cast(hex_char - 'a') + 10; + } else if (hex_char >= 'A' && hex_char <= 'F') { + digit = static_cast(hex_char - 'A') + 10; + } else { + digit = 0; + } + value = (value << 4) | digit; + } + return value; + }; + + auto code_point{parse_hex4(cursor)}; + if (code_point >= 0xD800 && code_point <= 0xDBFF) { + assert(cursor + 6 <= string_end); + cursor += 2; + const auto low{parse_hex4(cursor)}; + code_point = 0x10000 + ((code_point - 0xD800) << 10) + (low - 0xDC00); + } + + sourcemeta::core::codepoint_to_utf8(static_cast(code_point), + result); + break; + } + default: + break; + } + } + + return result; +} + +inline auto construct_number(const char *data, const std::uint32_t length) + -> JSON { + const bool has_dot{std::memchr(data, '.', length) != nullptr}; + const bool has_exponent{std::memchr(data, 'e', length) != nullptr || + std::memchr(data, 'E', length) != nullptr}; + + if (has_exponent) { + try { + return JSON{Decimal{std::string_view{data, length}}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + if (has_dot) { + std::size_t first_nonzero_position{JSON::String::npos}; + const auto decimal_position{static_cast( + static_cast(std::memchr(data, '.', length)) - data)}; + for (std::size_t index = 0; index < length; index++) { + if (index != decimal_position && data[index] != '0' && + data[index] != '-') { + first_nonzero_position = index; + break; + } + } + + if (first_nonzero_position == JSON::String::npos) { + first_nonzero_position = 0; + } + + const auto decimal_after_first_nonzero{decimal_position > + first_nonzero_position}; + const auto significant_digits{length - first_nonzero_position - + (decimal_after_first_nonzero ? 1 : 0)}; + constexpr std::size_t MAX_SAFE_SIGNIFICANT_DIGITS{15}; + if (significant_digits > MAX_SAFE_SIGNIFICANT_DIGITS) { + try { + return JSON{Decimal{std::string_view{data, length}}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + const typename JSON::String string_value{data, length}; + const auto double_result{sourcemeta::core::to_double(string_value)}; + if (double_result.has_value()) { + return JSON{double_result.value()}; + } + try { + return JSON{Decimal{string_value}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + auto digit_length = length; + if (digit_length > 0 && data[0] == '-') { + digit_length--; + } + + if (digit_length <= 19) { + const typename JSON::String string_value{data, length}; + const auto int_result{sourcemeta::core::to_int64_t(string_value)}; + if (int_result.has_value()) { + return JSON{int_result.value()}; + } + try { + return JSON{Decimal{string_value}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } + } + + try { + return JSON{Decimal{std::string_view{data, length}}}; + } catch (const DecimalParseError &) { + throw JSONParseError(1, 1); + } catch (const std::invalid_argument &) { + throw JSONParseError(1, 1); + } +} + +inline auto post_column_for(const TapeEntry &entry) -> std::uint64_t { + switch (entry.type) { + case TapeType::True: + return entry.column + 3; + case TapeType::False: + return entry.column + 4; + case TapeType::Null: + return entry.column + 3; + case TapeType::String: + case TapeType::Key: + return entry.column + entry.length + 1; + case TapeType::Number: + return entry.column + entry.length - 1; + default: + return entry.column; + } +} + +} // namespace internal + +// NOLINTBEGIN(cppcoreguidelines-avoid-goto,bugprone-use-after-move) + +#define CALLBACK_PRE(value_type, entry_ref, context, index, property) \ + if (callback) { \ + callback(JSON::ParsePhase::Pre, JSON::Type::value_type, (entry_ref).line, \ + (entry_ref).column, context, index, property); \ + } + +#define CALLBACK_POST(value_type, post_line, post_column) \ + if (callback) { \ + callback(JSON::ParsePhase::Post, JSON::Type::value_type, post_line, \ + post_column, JSON::ParseContext::Root, 0, empty_property); \ + } + +inline auto construct_json(const char *buffer, + const std::vector &tape, + const JSON::ParseCallback &callback, JSON &output) + -> void { + using Result = JSON; + enum class Container : std::uint8_t { Array, Object }; + std::vector levels; + std::vector> frames; + levels.reserve(32); + frames.reserve(32); + typename Result::String key; + typename Result::Object::hash_type key_hash; + std::uint64_t key_line{0}; + std::uint64_t key_column{0}; + std::size_t tape_index{0}; + static const JSON::String empty_property; + + if (tape.empty()) { + throw JSONParseError(1, 1); + } + + const auto &entry{tape[tape_index]}; + switch (entry.type) { + case TapeType::True: + CALLBACK_PRE(Boolean, entry, JSON::ParseContext::Root, 0, empty_property); + CALLBACK_POST(Boolean, entry.line, internal::post_column_for(entry)); + output = JSON{true}; + return; + case TapeType::False: + CALLBACK_PRE(Boolean, entry, JSON::ParseContext::Root, 0, empty_property); + CALLBACK_POST(Boolean, entry.line, internal::post_column_for(entry)); + output = JSON{false}; + return; + case TapeType::Null: + CALLBACK_PRE(Null, entry, JSON::ParseContext::Root, 0, empty_property); + CALLBACK_POST(Null, entry.line, internal::post_column_for(entry)); + output = JSON{nullptr}; + return; + case TapeType::String: { + CALLBACK_PRE(String, entry, JSON::ParseContext::Root, 0, empty_property); + auto value{Result{ + internal::unescape_string(buffer + entry.offset, entry.length)}}; + CALLBACK_POST(String, entry.line, internal::post_column_for(entry)); + output = std::move(value); + return; + } + case TapeType::Number: { + auto value = + internal::construct_number(buffer + entry.offset, entry.length); + if (value.is_integer()) { + CALLBACK_PRE(Integer, entry, JSON::ParseContext::Root, 0, + empty_property); + CALLBACK_POST(Integer, entry.line, internal::post_column_for(entry)); + } else if (value.is_decimal()) { + CALLBACK_PRE(Decimal, entry, JSON::ParseContext::Root, 0, + empty_property); + CALLBACK_POST(Decimal, entry.line, internal::post_column_for(entry)); + } else { + CALLBACK_PRE(Real, entry, JSON::ParseContext::Root, 0, empty_property); + CALLBACK_POST(Real, entry.line, internal::post_column_for(entry)); + } + output = std::move(value); + return; + } + case TapeType::ArrayStart: + CALLBACK_PRE(Array, entry, JSON::ParseContext::Root, 0, empty_property); + goto do_construct_array; + case TapeType::ObjectStart: + CALLBACK_PRE(Object, entry, JSON::ParseContext::Root, 0, empty_property); + goto do_construct_object; + default: + throw JSONParseError(1, 1); + } + + /* + * Construct an array + */ + +do_construct_array: { + const auto &array_entry{tape[tape_index]}; + assert(array_entry.type == TapeType::ArrayStart); + const auto child_count{array_entry.count}; + tape_index++; + + if (levels.empty()) { + levels.push_back(Container::Array); + output = Result::make_array(); + frames.emplace_back(output); + } else if (levels.back() == Container::Array) { + levels.push_back(Container::Array); + frames.back().get().push_back(Result::make_array()); + frames.emplace_back(frames.back().get().back()); + } else if (levels.back() == Container::Object) { + levels.push_back(Container::Array); + frames.back().get().assign(key, Result::make_array()); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Array, key_line, key_column, + JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + frames.emplace_back(frames.back().get().at(key)); + } + + frames.back().get().as_array().reserve(child_count); + + if (child_count == 0) { + assert(tape[tape_index].type == TapeType::ArrayEnd); + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Array, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_array_item; +} + +do_construct_array_item: { + assert(!levels.empty()); + assert(levels.back() == Container::Array); + const auto &item_entry{tape[tape_index]}; + + switch (item_entry.type) { + case TapeType::ArrayStart: + CALLBACK_PRE(Array, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + goto do_construct_array; + case TapeType::ObjectStart: + CALLBACK_PRE(Object, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + goto do_construct_object; + case TapeType::True: + CALLBACK_PRE(Boolean, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + frames.back().get().push_back(JSON{true}); + tape_index++; + CALLBACK_POST(Boolean, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::False: + CALLBACK_PRE(Boolean, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + frames.back().get().push_back(JSON{false}); + tape_index++; + CALLBACK_POST(Boolean, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::Null: + CALLBACK_PRE(Null, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + frames.back().get().push_back(JSON{nullptr}); + tape_index++; + CALLBACK_POST(Null, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::String: + CALLBACK_PRE(String, item_entry, JSON::ParseContext::Index, + frames.back().get().size(), empty_property); + frames.back().get().push_back(Result{internal::unescape_string( + buffer + item_entry.offset, item_entry.length)}); + tape_index++; + CALLBACK_POST(String, item_entry.line, + internal::post_column_for(item_entry)); + goto do_construct_array_item_separator; + case TapeType::Number: { + const auto current_index{frames.back().get().size()}; + auto value = internal::construct_number(buffer + item_entry.offset, + item_entry.length); + if (value.is_integer()) { + CALLBACK_PRE(Integer, item_entry, JSON::ParseContext::Index, + current_index, empty_property); + } else if (value.is_decimal()) { + CALLBACK_PRE(Decimal, item_entry, JSON::ParseContext::Index, + current_index, empty_property); + } else { + CALLBACK_PRE(Real, item_entry, JSON::ParseContext::Index, current_index, + empty_property); + } + const auto value_type{value.type()}; + frames.back().get().push_back(std::move(value)); + tape_index++; + if (value_type == JSON::Type::Integer) { + CALLBACK_POST(Integer, item_entry.line, + internal::post_column_for(item_entry)); + } else if (value_type == JSON::Type::Decimal) { + CALLBACK_POST(Decimal, item_entry.line, + internal::post_column_for(item_entry)); + } else { + CALLBACK_POST(Real, item_entry.line, + internal::post_column_for(item_entry)); + } + goto do_construct_array_item_separator; + } + default: + throw JSONParseError(1, 1); + } +} + +do_construct_array_item_separator: + if (tape[tape_index].type == TapeType::ArrayEnd) { + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Array, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_array_item; + + /* + * Construct an object + */ + +do_construct_object: { + const auto &object_entry{tape[tape_index]}; + assert(object_entry.type == TapeType::ObjectStart); + const auto property_count{object_entry.count}; + tape_index++; + + if (levels.empty()) { + levels.push_back(Container::Object); + output = Result::make_object(); + frames.emplace_back(output); + } else if (levels.back() == Container::Array) { + levels.push_back(Container::Object); + frames.back().get().push_back(Result::make_object()); + frames.emplace_back(frames.back().get().back()); + } else if (levels.back() == Container::Object) { + levels.push_back(Container::Object); + frames.back().get().assign(key, Result::make_object()); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Object, key_line, key_column, + JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + frames.emplace_back(frames.back().get().at(key)); + } + + frames.back().get().as_object().reserve(property_count); + + if (property_count == 0) { + assert(tape[tape_index].type == TapeType::ObjectEnd); + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Object, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_object_key; +} + +do_construct_object_key: { + assert(!levels.empty()); + assert(levels.back() == Container::Object); + const auto &key_entry{tape[tape_index]}; + assert(key_entry.type == TapeType::Key); + const char *key_data{buffer + key_entry.offset}; + const auto key_length{key_entry.length}; + if (std::memchr(key_data, '\\', key_length)) { + key = internal::unescape_string(key_data, key_length); + key_hash = frames.back().get().as_object().hash(key); + } else { + key.assign(key_data, key_length); + key_hash = frames.back().get().as_object().hash(key_data, key_length); + } + key_line = key_entry.line; + key_column = key_entry.column; + tape_index++; + goto do_construct_object_value; +} + +do_construct_object_value: { + const auto &value_entry{tape[tape_index]}; + + switch (value_entry.type) { + case TapeType::ArrayStart: + goto do_construct_array; + case TapeType::ObjectStart: + goto do_construct_object; + case TapeType::True: + frames.back().get().assign_assume_new(std::move(key), JSON{true}, + key_hash); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Boolean, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + tape_index++; + CALLBACK_POST(Boolean, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::False: + frames.back().get().assign_assume_new(std::move(key), JSON{false}, + key_hash); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Boolean, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + tape_index++; + CALLBACK_POST(Boolean, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::Null: + frames.back().get().assign_assume_new(std::move(key), JSON{nullptr}, + key_hash); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::Null, key_line, key_column, + JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + tape_index++; + CALLBACK_POST(Null, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::String: + frames.back().get().assign_assume_new( + std::move(key), + Result{internal::unescape_string(buffer + value_entry.offset, + value_entry.length)}, + key_hash); + if (callback) { + callback(JSON::ParsePhase::Pre, JSON::Type::String, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + tape_index++; + CALLBACK_POST(String, value_entry.line, + internal::post_column_for(value_entry)); + goto do_construct_object_property_end; + case TapeType::Number: { + auto value = internal::construct_number(buffer + value_entry.offset, + value_entry.length); + const auto value_type{value.type()}; + frames.back().get().assign_assume_new(std::move(key), std::move(value), + key_hash); + if (callback) { + if (value_type == JSON::Type::Integer) { + callback(JSON::ParsePhase::Pre, JSON::Type::Integer, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } else if (value_type == JSON::Type::Decimal) { + callback(JSON::ParsePhase::Pre, JSON::Type::Decimal, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } else { + callback(JSON::ParsePhase::Pre, JSON::Type::Real, key_line, + key_column, JSON::ParseContext::Property, 0, + frames.back().get().as_object().back_key()); + } + } + tape_index++; + if (value_type == JSON::Type::Integer) { + CALLBACK_POST(Integer, value_entry.line, + internal::post_column_for(value_entry)); + } else if (value_type == JSON::Type::Decimal) { + CALLBACK_POST(Decimal, value_entry.line, + internal::post_column_for(value_entry)); + } else { + CALLBACK_POST(Real, value_entry.line, + internal::post_column_for(value_entry)); + } + goto do_construct_object_property_end; + } + default: + throw JSONParseError(1, 1); + } +} + +do_construct_object_property_end: + if (tape[tape_index].type == TapeType::ObjectEnd) { + const auto &end_entry{tape[tape_index]}; + tape_index++; + CALLBACK_POST(Object, end_entry.line, end_entry.column); + goto do_construct_container_end; + } + + goto do_construct_object_key; + + /* + * Finish constructing a container + */ + +do_construct_container_end: + assert(!levels.empty()); + if (levels.size() == 1) { + return; + } + + frames.pop_back(); + levels.pop_back(); + if (levels.back() == Container::Array) { + goto do_construct_array_item_separator; + } else { + goto do_construct_object_property_end; + } +} + +// NOLINTEND(cppcoreguidelines-avoid-goto,bugprone-use-after-move) + +#undef CALLBACK_PRE +#undef CALLBACK_POST + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/json/grammar.h b/vendor/core/src/core/json/grammar.h index 093df10c..6b9338af 100644 --- a/vendor/core/src/core/json/grammar.h +++ b/vendor/core/src/core/json/grammar.h @@ -72,14 +72,17 @@ static constexpr CharT token_object_delimiter{'\u002C'}; // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf // Boolean +template static constexpr CharT token_true{'\u0074'}; template static constexpr std::basic_string_view constant_true{ "\u0074\u0072\u0075\u0065"}; +template static constexpr CharT token_false{'\u0066'}; template static constexpr std::basic_string_view constant_false{ "\u0066\u0061\u006C\u0073\u0065"}; // Null +template static constexpr CharT token_null{'\u006E'}; template static constexpr std::basic_string_view constant_null{ "\u006E\u0075\u006C\u006C"}; diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json.h b/vendor/core/src/core/json/include/sourcemeta/core/json.h index 1052e7ca..98c7231c 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json.h @@ -11,6 +11,8 @@ #include // NOLINTEND(misc-include-cleaner) +#include + #include // std::uint64_t #include // std::filesystem #include // std::basic_ifstream @@ -49,8 +51,8 @@ namespace sourcemeta::core { /// /// If parsing fails, sourcemeta::core::JSONParseError will be thrown. SOURCEMETA_CORE_JSON_EXPORT -auto parse_json(std::basic_istream &stream, - const JSON::ParseCallback &callback = nullptr) -> JSON; +auto parse_json(std::basic_istream &stream) + -> JSON; /// @ingroup json /// @@ -68,8 +70,8 @@ auto parse_json(std::basic_istream &stream, /// /// If parsing fails, sourcemeta::core::JSONParseError will be thrown. SOURCEMETA_CORE_JSON_EXPORT -auto parse_json(const std::basic_string &input, - const JSON::ParseCallback &callback = nullptr) -> JSON; +auto parse_json(const std::basic_string &input) + -> JSON; /// @ingroup json /// @@ -90,8 +92,7 @@ auto parse_json(const std::basic_string &input, /// ``` SOURCEMETA_CORE_JSON_EXPORT auto parse_json(std::basic_istream &stream, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback = nullptr) -> JSON; + std::uint64_t &line, std::uint64_t &column) -> JSON; /// @ingroup json /// @@ -110,8 +111,7 @@ auto parse_json(std::basic_istream &stream, /// ``` SOURCEMETA_CORE_JSON_EXPORT auto parse_json(const std::basic_string &input, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback = nullptr) -> JSON; + std::uint64_t &line, std::uint64_t &column) -> JSON; /// @ingroup json /// @@ -128,10 +128,74 @@ auto parse_json(const std::basic_string &input, /// std::cout << std::endl; /// ``` /// +/// If parsing fails, sourcemeta::core::JSONFileParseError will be thrown. +SOURCEMETA_CORE_JSON_EXPORT +auto read_json(const std::filesystem::path &path) -> JSON; + +/// @ingroup json +/// +/// Parse a JSON document from a C++ standard input stream into an existing +/// JSON value, invoking the given callback during parsing. The result is +/// constructed directly into the given reference rather than returned by value +/// to ensure that references passed through the parse callback (such as object +/// property names) remain valid after parsing completes. +/// /// If parsing fails, sourcemeta::core::JSONParseError will be thrown. SOURCEMETA_CORE_JSON_EXPORT -auto read_json(const std::filesystem::path &path, - const JSON::ParseCallback &callback = nullptr) -> JSON; +auto parse_json(std::basic_istream &stream, + JSON &output, const JSON::ParseCallback &callback) -> void; + +/// @ingroup json +/// +/// Parse a JSON document from a JSON string into an existing JSON value, +/// invoking the given callback during parsing. The result is constructed +/// directly into the given reference rather than returned by value to ensure +/// that references passed through the parse callback (such as object property +/// names) remain valid after parsing completes. +/// +/// If parsing fails, sourcemeta::core::JSONParseError will be thrown. +SOURCEMETA_CORE_JSON_EXPORT +auto parse_json(const std::basic_string &input, + JSON &output, const JSON::ParseCallback &callback) -> void; + +/// @ingroup json +/// +/// Parse a JSON document from a C++ standard input stream into an existing +/// JSON value, passing your own `line` and `column` read/write position +/// indicators and invoking the given callback during parsing. The result is +/// constructed directly into the given reference rather than returned by value +/// to ensure that references passed through the parse callback (such as object +/// property names) remain valid after parsing completes. +SOURCEMETA_CORE_JSON_EXPORT +auto parse_json(std::basic_istream &stream, + std::uint64_t &line, std::uint64_t &column, JSON &output, + const JSON::ParseCallback &callback) -> void; + +/// @ingroup json +/// +/// Parse a JSON document from a JSON string into an existing JSON value, +/// passing your own `line` and `column` read/write position indicators and +/// invoking the given callback during parsing. The result is constructed +/// directly into the given reference rather than returned by value to ensure +/// that references passed through the parse callback (such as object property +/// names) remain valid after parsing completes. +SOURCEMETA_CORE_JSON_EXPORT +auto parse_json(const std::basic_string &input, + std::uint64_t &line, std::uint64_t &column, JSON &output, + const JSON::ParseCallback &callback) -> void; + +/// @ingroup json +/// +/// A convenience function to parse a JSON document from a file into an existing +/// JSON value, invoking the given callback during parsing. The result is +/// constructed directly into the given reference rather than returned by value +/// to ensure that references passed through the parse callback (such as object +/// property names) remain valid after parsing completes. +/// +/// If parsing fails, sourcemeta::core::JSONFileParseError will be thrown. +SOURCEMETA_CORE_JSON_EXPORT +auto read_json(const std::filesystem::path &path, JSON &output, + const JSON::ParseCallback &callback) -> void; /// @ingroup json /// @@ -228,8 +292,14 @@ auto operator<<(std::basic_ostream &stream, /// {sourcemeta::core::JSON::Type::Object, /// sourcemeta::core::JSON::Type::Array}); /// ``` -SOURCEMETA_CORE_JSON_EXPORT -auto make_set(std::initializer_list types) -> JSON::TypeSet; +SOURCEMETA_FORCEINLINE inline auto +make_set(std::initializer_list types) -> JSON::TypeSet { + JSON::TypeSet result; + for (const auto type : types) { + result.set(static_cast(type)); + } + return result; +} } // namespace sourcemeta::core diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_array.h b/vendor/core/src/core/json/include/sourcemeta/core/json_array.h index 808cafbc..f7194492 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_array.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_array.h @@ -97,6 +97,11 @@ template class JSONArray { return this->data.size(); } + /// Reserve capacity for a given number of elements + auto reserve(const size_type capacity) -> void { + this->data.reserve(capacity); + } + private: friend Value; // Exporting symbols that depends on the standard C++ library is considered diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h b/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h index 05976717..813c829d 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_auto.h @@ -202,17 +202,10 @@ template requires std::is_same_v auto to_json(const T &hash) -> JSON { auto result{JSON::make_array()}; -#if defined(__SIZEOF_INT128__) result.push_back(JSON{static_cast(hash.a >> 64)}); result.push_back(JSON{static_cast(hash.a)}); result.push_back(JSON{static_cast(hash.b >> 64)}); result.push_back(JSON{static_cast(hash.b)}); -#else - result.push_back(JSON{static_cast(hash.a)}); - result.push_back(JSON{static_cast(hash.b)}); - result.push_back(JSON{static_cast(hash.c)}); - result.push_back(JSON{static_cast(hash.d)}); -#endif return result; } @@ -228,21 +221,17 @@ auto from_json(const JSON &value) -> std::optional { return std::nullopt; } -#if defined(__SIZEOF_INT128__) - return T{(static_cast<__uint128_t>( + using uint128_type = JSON::Object::hash_type::type; + return T{(static_cast( static_cast(value.at(0).to_integer())) << 64) | - static_cast(value.at(1).to_integer()), - (static_cast<__uint128_t>( + static_cast( + static_cast(value.at(1).to_integer())), + (static_cast( static_cast(value.at(2).to_integer())) << 64) | - static_cast(value.at(3).to_integer())}; -#else - return T{static_cast(value.at(0).to_integer()), - static_cast(value.at(1).to_integer()), - static_cast(value.at(2).to_integer()), - static_cast(value.at(3).to_integer())}; -#endif + static_cast( + static_cast(value.at(3).to_integer()))}; } /// @ingroup json diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h b/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h index 49ee1066..728fdc9f 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_hash.h @@ -1,8 +1,9 @@ #ifndef SOURCEMETA_CORE_JSON_HASH_H_ #define SOURCEMETA_CORE_JSON_HASH_H_ +#include + #include // assert -#include // std::uint64_t #include // std::memcpy #include // std::reference_wrapper @@ -29,111 +30,98 @@ template struct HashJSON { /// @ingroup json template struct PropertyHashJSON { struct hash_type { - // For performance when the platform allows it -#if defined(__SIZEOF_INT128__) - using type = __uint128_t; - type a{0}; - type b{0}; -#else - using type = std::uint64_t; + using type = sourcemeta::core::uint128_t; type a{0}; type b{0}; - type c{0}; - type d{0}; -#endif - inline auto operator==(const hash_type &other) const noexcept -> bool { -#if defined(__SIZEOF_INT128__) - return this->a == other.a && this->b == other.b; -#else - return this->a == other.a && this->b == other.b && this->c == other.c && - this->d == other.d; -#endif - } + auto operator==(const hash_type &) const noexcept -> bool = default; }; [[nodiscard]] - inline auto perfect(const T &value, const std::size_t size) const noexcept + inline auto perfect(const char *data, const std::size_t size) const noexcept -> hash_type { hash_type result; - assert(!value.empty()); - // Copy starting a byte 2 - std::memcpy(reinterpret_cast(&result) + 1, value.data(), size); + assert(size > 0); + std::memcpy(reinterpret_cast(&result) + 1, data, size); return result; } + // GCC does not optimise well across implicit type conversions such as + // std::string to std::string_view, so we provide separate overloads with + // duplicated logic instead of unifying on a single parameter type + inline auto operator()(const T &value) const noexcept -> hash_type { const auto size{value.size()}; switch (size) { case 0: return {}; case 1: - return this->perfect(value, 1); + return this->perfect(value.data(), 1); case 2: - return this->perfect(value, 2); + return this->perfect(value.data(), 2); case 3: - return this->perfect(value, 3); + return this->perfect(value.data(), 3); case 4: - return this->perfect(value, 4); + return this->perfect(value.data(), 4); case 5: - return this->perfect(value, 5); + return this->perfect(value.data(), 5); case 6: - return this->perfect(value, 6); + return this->perfect(value.data(), 6); case 7: - return this->perfect(value, 7); + return this->perfect(value.data(), 7); case 8: - return this->perfect(value, 8); + return this->perfect(value.data(), 8); case 9: - return this->perfect(value, 9); + return this->perfect(value.data(), 9); case 10: - return this->perfect(value, 10); + return this->perfect(value.data(), 10); case 11: - return this->perfect(value, 11); + return this->perfect(value.data(), 11); case 12: - return this->perfect(value, 12); + return this->perfect(value.data(), 12); case 13: - return this->perfect(value, 13); + return this->perfect(value.data(), 13); case 14: - return this->perfect(value, 14); + return this->perfect(value.data(), 14); case 15: - return this->perfect(value, 15); + return this->perfect(value.data(), 15); case 16: - return this->perfect(value, 16); + return this->perfect(value.data(), 16); case 17: - return this->perfect(value, 17); + return this->perfect(value.data(), 17); case 18: - return this->perfect(value, 18); + return this->perfect(value.data(), 18); case 19: - return this->perfect(value, 19); + return this->perfect(value.data(), 19); case 20: - return this->perfect(value, 20); + return this->perfect(value.data(), 20); case 21: - return this->perfect(value, 21); + return this->perfect(value.data(), 21); case 22: - return this->perfect(value, 22); + return this->perfect(value.data(), 22); case 23: - return this->perfect(value, 23); + return this->perfect(value.data(), 23); case 24: - return this->perfect(value, 24); + return this->perfect(value.data(), 24); case 25: - return this->perfect(value, 25); + return this->perfect(value.data(), 25); case 26: - return this->perfect(value, 26); + return this->perfect(value.data(), 26); case 27: - return this->perfect(value, 27); + return this->perfect(value.data(), 27); case 28: - return this->perfect(value, 28); + return this->perfect(value.data(), 28); case 29: - return this->perfect(value, 29); + return this->perfect(value.data(), 29); case 30: - return this->perfect(value, 30); + return this->perfect(value.data(), 30); case 31: - return this->perfect(value, 31); + return this->perfect(value.data(), 31); default: // This case is specifically designed to be constant with regards to // string length, and to exploit the fact that most JSON objects don't // have a lot of entries, so hash collision is not as common - auto hash = this->perfect(value, 31); + auto hash = this->perfect(value.data(), 31); hash.a |= 1 + (size + static_cast(value.front()) + static_cast(value.back())) % @@ -143,6 +131,86 @@ template struct PropertyHashJSON { } } + inline auto operator()(const char *data, + const std::size_t size) const noexcept -> hash_type { + switch (size) { + case 0: + return {}; + case 1: + return this->perfect(data, 1); + case 2: + return this->perfect(data, 2); + case 3: + return this->perfect(data, 3); + case 4: + return this->perfect(data, 4); + case 5: + return this->perfect(data, 5); + case 6: + return this->perfect(data, 6); + case 7: + return this->perfect(data, 7); + case 8: + return this->perfect(data, 8); + case 9: + return this->perfect(data, 9); + case 10: + return this->perfect(data, 10); + case 11: + return this->perfect(data, 11); + case 12: + return this->perfect(data, 12); + case 13: + return this->perfect(data, 13); + case 14: + return this->perfect(data, 14); + case 15: + return this->perfect(data, 15); + case 16: + return this->perfect(data, 16); + case 17: + return this->perfect(data, 17); + case 18: + return this->perfect(data, 18); + case 19: + return this->perfect(data, 19); + case 20: + return this->perfect(data, 20); + case 21: + return this->perfect(data, 21); + case 22: + return this->perfect(data, 22); + case 23: + return this->perfect(data, 23); + case 24: + return this->perfect(data, 24); + case 25: + return this->perfect(data, 25); + case 26: + return this->perfect(data, 26); + case 27: + return this->perfect(data, 27); + case 28: + return this->perfect(data, 28); + case 29: + return this->perfect(data, 29); + case 30: + return this->perfect(data, 30); + case 31: + return this->perfect(data, 31); + default: + // This case is specifically designed to be constant with regards to + // string length, and to exploit the fact that most JSON objects don't + // have a lot of entries, so hash collision is not as common + auto hash = this->perfect(data, 31); + hash.a |= 1 + (size + static_cast(data[0]) + + static_cast(data[size - 1])) % + // Make sure the property hash can never exceed 8 bits + 255; + return hash; + } + } + [[nodiscard]] inline auto is_perfect(const hash_type &hash) const noexcept -> bool { // If there is anything written past the first byte, diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_object.h b/vendor/core/src/core/json/include/sourcemeta/core/json_object.h index 86da8779..14e692c6 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_object.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_object.h @@ -124,6 +124,13 @@ template class JSONObject { return this->hasher(key); } + /// Compute a hash from raw data + [[nodiscard]] inline auto hash(const char *raw_data, + const std::size_t raw_size) const noexcept + -> hash_type { + return hasher(raw_data, raw_size); + } + /// Attempt to find an entry by key [[nodiscard]] inline auto find(const Key &key) const -> const_iterator { const auto key_hash{this->hash(key)}; @@ -183,6 +190,11 @@ template class JSONObject { /// Check if the object is empty [[nodiscard]] inline auto empty() const -> bool { return this->data.empty(); } + /// Reserve capacity for a given number of entries + inline auto reserve(const size_type capacity) -> void { + this->data.reserve(capacity); + } + /// Access an object entry by its underlying positional index [[nodiscard]] inline auto at(const size_type index) const -> const Entry & { return this->data.at(index); @@ -365,6 +377,24 @@ template class JSONObject { return key_hash; } + /// Emplace an object property with a pre-computed hash + inline auto emplace_assume_new(Key &&key, mapped_type &&value, + const hash_type key_hash) -> void { + this->data.push_back({std::move(key), std::move(value), key_hash}); + } + + /// Emplace an object property with a pre-computed hash + inline auto emplace_assume_new(const Key &key, mapped_type &&value, + const hash_type key_hash) -> void { + this->data.push_back({key, std::move(value), key_hash}); + } + + /// Get the key of the last-inserted property + [[nodiscard]] inline auto back_key() const noexcept -> const Key & { + assert(!this->data.empty()); + return this->data.back().first; + } + /// Remove every property in the object inline auto clear() noexcept -> void { this->data.clear(); } @@ -437,7 +467,7 @@ template class JSONObject { #if defined(_MSC_VER) #pragma warning(disable : 4251) #endif - Hash hasher; + static constexpr Hash hasher{}; underlying_type data; #if defined(_MSC_VER) #pragma warning(default : 4251) diff --git a/vendor/core/src/core/json/include/sourcemeta/core/json_value.h b/vendor/core/src/core/json/include/sourcemeta/core/json_value.h index 92689ae7..cb9045c5 100644 --- a/vendor/core/src/core/json/include/sourcemeta/core/json_value.h +++ b/vendor/core/src/core/json/include/sourcemeta/core/json_value.h @@ -10,10 +10,12 @@ #include #include +#include #include // std::any_of #include // std::bitset #include // assert +#include // std::modf, std::trunc, std::isinf, std::isnan #include // std::size_t #include // std::int64_t, std::uint8_t #include // std::less, std::reference_wrapper, std::function @@ -76,7 +78,7 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { using ParseCallback = std::function; + const std::size_t index, const String &property)>; /// A comparison function between object property keys. /// See https://en.cppreference.com/w/cpp/named_req/Compare @@ -372,7 +374,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{true}; /// assert(document.is_boolean()); /// ``` - [[nodiscard]] auto is_boolean() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_boolean() const noexcept + -> bool { + return this->current_type == Type::Boolean; + } /// Check if the input JSON document is null. For example: /// @@ -383,7 +388,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{nullptr}; /// assert(document.is_null()); /// ``` - [[nodiscard]] auto is_null() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_null() const noexcept + -> bool { + return this->current_type == Type::Null; + } /// Check if the input JSON document is an integer. For example: /// @@ -394,7 +402,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{5}; /// assert(document.is_integer()); /// ``` - [[nodiscard]] auto is_integer() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_integer() const noexcept + -> bool { + return this->current_type == Type::Integer; + } /// Check if the input JSON document is a real type. For example: /// @@ -405,7 +416,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{3.14}; /// assert(document.is_real()); /// ``` - [[nodiscard]] auto is_real() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_real() const noexcept + -> bool { + return this->current_type == Type::Real; + } /// Check if the input JSON document is an integer, a real number that /// represents an integer, or an integer decimal. For example: @@ -417,7 +431,21 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{5.0}; /// assert(document.is_integral()); /// ``` - [[nodiscard]] auto is_integral() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_integral() const noexcept + -> bool { + switch (this->type()) { + case Type::Integer: + return true; + case Type::Real: { + Real integral_part = 0.0; + return std::modf(this->to_real(), &integral_part) == 0.0; + } + case Type::Decimal: + return this->to_decimal().is_integral(); + default: + return false; + } + } /// Check if the input JSON document is either an integer or a real type. For /// example: @@ -431,7 +459,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(real.is_number()); /// assert(integer.is_number()); /// ``` - [[nodiscard]] auto is_number() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_number() const noexcept + -> bool { + return this->is_integer() || this->is_real() || this->is_decimal(); + } /// Check if the input JSON document is either a positive integer or a /// positive real number. Zero is considered to be positive. For example: @@ -456,7 +487,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{"foo"}; /// assert(document.is_string()); /// ``` - [[nodiscard]] auto is_string() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_string() const noexcept + -> bool { + return this->current_type == Type::String; + } /// Check if the input JSON document is an array. For example: /// @@ -468,7 +502,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// document=sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// assert(document.is_array()); /// ``` - [[nodiscard]] auto is_array() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_array() const noexcept + -> bool { + return this->current_type == Type::Array; + } /// Check if the input JSON document is an object. For example: /// @@ -480,7 +517,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// document=sourcemeta::core::parse_json("{ \"foo\": 1 }"); /// assert(document.is_object()); /// ``` - [[nodiscard]] auto is_object() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_object() const noexcept + -> bool { + return this->current_type == Type::Object; + } /// Check if the input JSON document is an arbitrary precision decimal value. /// For example: @@ -493,7 +533,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{value}; /// assert(document.is_decimal()); /// ``` - [[nodiscard]] auto is_decimal() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto is_decimal() const noexcept + -> bool { + return this->current_type == Type::Decimal; + } /// Get the type of the JSON document. For example: /// @@ -504,7 +547,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{true}; /// assert(document.type() == sourcemeta::core::JSON::Type::Boolean); /// ``` - [[nodiscard]] auto type() const noexcept -> Type; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto type() const noexcept + -> Type { + return this->current_type; + } /* * Type conversion @@ -521,7 +567,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.is_boolean()); /// assert(document.to_boolean()); /// ``` - [[nodiscard]] auto to_boolean() const noexcept -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto to_boolean() const noexcept + -> bool { + assert(this->is_boolean()); + return this->data_boolean; + } /// Convert a JSON instance into a signed integer value. The result of this /// method is undefined unless the JSON instance holds an integer value. For @@ -535,7 +585,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.is_integer()); /// assert(document.to_integer() == 5); /// ``` - [[nodiscard]] auto to_integer() const noexcept -> Integer; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto to_integer() const noexcept + -> Integer { + assert(this->is_integer()); + return this->data_integer; + } /// Convert a JSON instance into an IEEE 64-bit floating-point value. The /// result of this method is undefined unless the JSON instance holds a real @@ -549,7 +603,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.is_real()); /// assert(document.to_real() == 3.14); /// ``` - [[nodiscard]] auto to_real() const noexcept -> Real; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto to_real() const noexcept + -> Real { + assert(this->is_real()); + assert(!std::isinf(this->data_real)); + assert(!std::isnan(this->data_real)); + return this->data_real; + } /// Convert a JSON instance into a decimal value. The result of this method /// is undefined unless the JSON instance holds a decimal value. For example: @@ -563,7 +623,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.is_decimal()); /// assert(document.to_decimal().to_int64() == 1234567890); /// ``` - [[nodiscard]] auto to_decimal() const noexcept -> const Decimal &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto to_decimal() const noexcept + -> const Decimal & { + assert(this->is_decimal()); + assert(this->data_decimal->is_finite()); + assert(!this->data_decimal->is_nan()); + return *this->data_decimal; + } /// Convert a JSON instance into a standard string value. The result of this /// method is undefined unless the JSON instance holds a string value. For @@ -577,7 +643,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.is_string()); /// assert(document.to_string() == "foo"); /// ``` - [[nodiscard]] auto to_string() const noexcept -> const String &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto to_string() const noexcept + -> const String & { + assert(this->is_string()); + return this->data_string; + } /// Get a standard input string stream from a JSON string. The result of this /// method is undefined unless the JSON instance holds a string value. For @@ -613,7 +683,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// << "\n"; /// }); /// ``` - [[nodiscard]] auto as_array() const noexcept -> const Array &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_array() const noexcept + -> const Array & { + assert(this->is_array()); + return this->data_array; + } /// Get the JSON document as an array instance. This is convenient /// for using mutable iterators on the array. For example: @@ -627,7 +701,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// std::sort(document.as_array().begin(), document.as_array().end()); /// ``` - [[nodiscard]] auto as_array() noexcept -> Array &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_array() noexcept + -> Array & { + assert(this->is_array()); + return this->data_array; + } /// Get the JSON document as an object instance. This is convenient /// for using constant iterators on the object. For example: @@ -651,7 +729,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// << "\n"; /// }); /// ``` - [[nodiscard]] auto as_object() noexcept -> Object &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_object() noexcept + -> Object & { + assert(this->is_object()); + return this->data_object; + } /// Get the JSON document as an object instance. This is convenient /// for using mutable iterators on the object. For example: @@ -671,7 +753,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// value += sourcemeta::core::JSON{1}; /// } /// ``` - [[nodiscard]] auto as_object() const noexcept -> const Object &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_object() const noexcept + -> const Object & { + assert(this->is_object()); + return this->data_object; + } /// Get the JSON numeric document as a real number if it is not one already. /// For example: @@ -683,7 +769,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{5}; /// assert(document.as_real() == 5.0); /// ``` - [[nodiscard]] auto as_real() const noexcept -> Real; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_real() const noexcept + -> Real { + assert(this->is_number()); + return this->is_real() ? this->to_real() + : static_cast(this->to_integer()); + } /// Get the JSON numeric document as an integer number if it is not one /// already. If the number is a real number, truncation will take place. For @@ -696,7 +787,15 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON document{5.3}; /// assert(document.as_integer() == 5); /// ``` - [[nodiscard]] auto as_integer() const noexcept -> Integer; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto as_integer() const noexcept + -> Integer { + assert(this->is_number()); + if (this->is_integer()) { + return this->to_integer(); + } else { + return static_cast(std::trunc(this->to_real())); + } + } /* * Getters @@ -720,8 +819,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("{ \"1\": "foo" }"); /// assert(my_array.at(1).to_string() == "foo"); /// ``` - [[nodiscard]] auto at(const typename Array::size_type index) const - -> const JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + at(const typename Array::size_type index) const -> const JSON & { + assert(this->is_array()); + assert(index < this->size()); + return this->data_array.data.at(index); + } /// This method retrieves a element by its index. If the input JSON instance /// is an object, a property that corresponds to the stringified integer will @@ -741,7 +844,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("{ \"1\": "foo" }"); /// assert(my_array.at(1).to_string() == "foo"); /// ``` - [[nodiscard]] auto at(const typename Array::size_type index) -> JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + at(const typename Array::size_type index) -> JSON & { + assert(this->is_array()); + assert(index < this->size()); + return this->data_array.data.at(index); + } /// This method retrieves an object element. /// @@ -755,7 +863,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("{ \"foo\": 1, \"bar\": 2 }"); /// assert(my_object.at("bar").to_integer() == 2); /// ``` - [[nodiscard]] auto at(const String &key) const -> const JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto at(const String &key) const + -> const JSON & { + assert(this->is_object()); + assert(this->defines(key)); + const auto &object{this->data_object}; + return object.at(key, object.hash(key)); + } /// This method retrieves an object element given a pre-calculated property /// hash. @@ -771,9 +885,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(my_object.at("bar", /// my_object.as_object().hash("bar")).to_integer() == 2); /// ``` - [[nodiscard]] auto at(const String &key, - const typename Object::hash_type hash) const - -> const JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + at(const String &key, const typename Object::hash_type hash) const + -> const JSON & { + assert(this->is_object()); + assert(this->defines(key)); + return this->data_object.at(key, hash); + } /// This method retrieves an object element. /// @@ -787,7 +905,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("{ \"foo\": 1, \"bar\": 2 }"); /// assert(my_object.at("bar").to_integer() == 2); /// ``` - [[nodiscard]] auto at(const String &key) -> JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto at(const String &key) + -> JSON & { + assert(this->is_object()); + assert(this->defines(key)); + auto &object{this->data_object}; + return object.at(key, object.hash(key)); + } /// This method retrieves an object element given a pre-calculated property /// hash. @@ -803,8 +927,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(my_object.at("bar", /// my_object.as_object().hash("bar")).to_integer() == 2); /// ``` - [[nodiscard]] auto at(const String &key, - const typename Object::hash_type hash) -> JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + at(const String &key, const typename Object::hash_type hash) -> JSON & { + assert(this->is_object()); + assert(this->defines(key)); + return this->data_object.at(key, hash); + } /// This method retrieves an object property or a user provided value if such /// property is not defined. @@ -866,7 +994,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// assert(document.front().to_integer() == 1); /// ``` - [[nodiscard]] auto front() -> JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto front() -> JSON & { + assert(this->is_array()); + assert(!this->empty()); + return this->data_array.data.front(); + } /// This method retrieves a reference to the first element of a JSON array. /// This method is undefined if the input JSON instance is an empty array. For @@ -880,7 +1012,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// assert(document.front().to_integer() == 1); /// ``` - [[nodiscard]] auto front() const -> const JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto front() const + -> const JSON & { + assert(this->is_array()); + assert(!this->empty()); + return this->data_array.data.front(); + } /// This method retrieves a reference to the last element of a JSON array. /// This method is undefined if the input JSON instance is an empty array. For @@ -894,7 +1031,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// assert(document.back().to_integer() == 3); /// ``` - [[nodiscard]] auto back() -> JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto back() -> JSON & { + assert(this->is_array()); + assert(!this->empty()); + return this->data_array.data.back(); + } /// This method retrieves a reference to the last element of a JSON array. /// This method is undefined if the input JSON instance is an empty array. For @@ -908,7 +1049,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2, 3 ]"); /// assert(document.back().to_integer() == 3); /// ``` - [[nodiscard]] auto back() const -> const JSON &; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto back() const + -> const JSON & { + assert(this->is_array()); + assert(!this->empty()); + return this->data_array.data.back(); + } /* * Read operations @@ -934,7 +1080,15 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(my_array.size() == 2); /// assert(my_string.size() == 3); /// ``` - [[nodiscard]] auto size() const -> std::size_t; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto size() const -> std::size_t { + if (this->is_object()) { + return this->object_size(); + } else if (this->is_array()) { + return this->array_size(); + } else { + return this->string_size(); + } + } /// If the input JSON instance is a string, return its logical length. /// @@ -947,7 +1101,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const sourcemeta::core::JSON my_string{"foo"}; /// assert(my_string.string_size() == 3); /// ``` - [[nodiscard]] auto string_size() const -> std::size_t; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto string_size() const + -> std::size_t { + assert(this->is_string()); + return JSON::size(this->data_string); + } /// If the input JSON instance is an array, return its number of elements. /// @@ -961,7 +1119,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("[ 1, 2 ]"); /// assert(my_array.array_size() == 2); /// ``` - [[nodiscard]] auto array_size() const -> std::size_t; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto array_size() const + -> std::size_t { + assert(this->is_array()); + return this->data_array.data.size(); + } /// If the input JSON instance is an object, return its number of pairs. /// @@ -975,7 +1137,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("{ \"foo\": 1 }"); /// assert(my_object.object_size() == 1); /// ``` - [[nodiscard]] auto object_size() const -> std::size_t; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto object_size() const + -> std::size_t { + assert(this->is_object()); + return this->data_object.size(); + } /// If the input JSON instance is string, input JSON instance is a string, /// return its number of bytes. For example: @@ -988,7 +1154,11 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// sourcemeta::core::parse_json("\"\\uD83D\\uDCA9\"")}; /// assert(my_string.size() == 2); /// ``` - [[nodiscard]] auto byte_size() const -> std::size_t; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto byte_size() const + -> std::size_t { + assert(this->is_string()); + return this->data_string.size(); + } /// Estimate the byte size occupied by the given parsed JSON instance (not its /// stringified representation). Keep in mind that as the method name implies, @@ -1056,7 +1226,15 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(my_array.empty()); /// assert(my_string.empty()); /// ``` - [[nodiscard]] auto empty() const -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto empty() const -> bool { + if (this->is_object()) { + return this->data_object.empty(); + } else if (this->is_array()) { + return this->data_array.data.empty(); + } else { + return this->data_string.empty(); + } + } /// This method checks whether an input JSON object defines a specific key /// and returns the value if it does. For example: @@ -1071,7 +1249,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// const auto result = document.try_at("foo"); /// EXPECT_TRUE(result); /// EXPECT_EQ(result->to_integer(), 1); - [[nodiscard]] auto try_at(const String &key) const -> const JSON *; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + try_at(const String &key) const -> const JSON * { + assert(this->is_object()); + const auto &object{this->data_object}; + return object.try_at(key, object.hash(key)); + } /// This method checks, given a pre-calculated hash, whether an input JSON /// object defines a specific key and returns the value if it does. For @@ -1088,9 +1271,13 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// document.as_object().hash("foo")); /// EXPECT_TRUE(result); /// EXPECT_EQ(result->to_integer(), 1); - [[nodiscard]] auto try_at(const String &key, - const typename Object::hash_type hash) const - -> const JSON *; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + try_at(const String &key, const typename Object::hash_type hash) const + -> const JSON * { + assert(this->is_object()); + const auto &object{this->data_object}; + return object.try_at(key, hash); + } /// This method checks whether an input JSON object defines a specific key. /// For example: @@ -1104,7 +1291,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.defines("foo")); /// assert(!document.defines("bar")); /// ``` - [[nodiscard]] auto defines(const String &key) const -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + defines(const String &key) const -> bool { + assert(this->is_object()); + const auto &object{this->data_object}; + return object.defines(key, object.hash(key)); + } /// This method checks whether an input JSON object defines a specific key /// given a pre-calculated property hash. For example: @@ -1120,9 +1312,12 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.defines("bar", /// document.as_object().hash("bar"))); /// ``` - [[nodiscard]] auto defines(const String &key, - const typename Object::hash_type hash) const - -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + defines(const String &key, const typename Object::hash_type hash) const + -> bool { + assert(this->is_object()); + return this->data_object.defines(key, hash); + } /// This method checks whether an input JSON object defines a specific integer /// key. For example: @@ -1136,8 +1331,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// assert(document.defines(0)); /// assert(!document.defines(1)); /// ``` - [[nodiscard]] auto defines(const typename Array::size_type index) const - -> bool; + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto + defines(const typename Array::size_type index) const -> bool { + return this->defines(std::to_string(index)); + } /// This method checks whether an input JSON object defines at least one given /// key. @@ -1438,6 +1635,10 @@ class SOURCEMETA_CORE_JSON_EXPORT JSON { /// ``` auto assign_assume_new(String &&key, JSON &&value) -> void; + /// This method sets an object key with a pre-computed hash + auto assign_assume_new(String &&key, JSON &&value, Object::hash_type hash) + -> void; + /// This method deletes an object key. For example: /// /// ```cpp diff --git a/vendor/core/src/core/json/json.cc b/vendor/core/src/core/json/json.cc index 41224800..4a629737 100644 --- a/vendor/core/src/core/json/json.cc +++ b/vendor/core/src/core/json/json.cc @@ -4,6 +4,7 @@ #include #include +#include "construct.h" #include "parser.h" #include "stringify.h" @@ -13,43 +14,169 @@ #include // std::ifstream #include // std::basic_istream #include // std::basic_ostream +#include // std::basic_ostringstream #include // std::make_error_code, std::errc +#include // std::vector namespace sourcemeta::core { +static auto internal_parse_json(const char *&cursor, const char *end, + std::uint64_t &line, std::uint64_t &column, + const JSON::ParseCallback &callback, + const bool track_positions, JSON &output) + -> void { + const char *buffer_start{cursor}; + std::vector tape; + tape.reserve(static_cast(end - cursor) / 8); + if (callback || track_positions) { + scan_json(cursor, end, buffer_start, line, column, tape); + } else { + try { + scan_json(cursor, end, buffer_start, line, column, tape); + } catch (const JSONParseError &) { + cursor = buffer_start; + tape.clear(); + line = 1; + column = 0; + scan_json(cursor, end, buffer_start, line, column, tape); + } + } + construct_json(buffer_start, tape, callback, output); +} + +static auto internal_parse_json(const char *&cursor, const char *end, + std::uint64_t &line, std::uint64_t &column, + const bool track_positions) -> JSON { + JSON output{nullptr}; + internal_parse_json(cursor, end, line, column, nullptr, track_positions, + output); + return output; +} + // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) auto parse_json(std::basic_istream &stream, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback) -> JSON { - return internal_parse_json(stream, line, column, callback); + std::uint64_t &line, std::uint64_t &column) -> JSON { + const auto start_position{stream.tellg()}; + std::basic_ostringstream buffer; + buffer << stream.rdbuf(); + const auto input{buffer.str()}; + const char *cursor{input.data()}; + const char *end{input.data() + input.size()}; + auto result{internal_parse_json(cursor, end, line, column, true)}; + if (start_position != static_cast(-1)) { + const auto consumed{static_cast(cursor - input.data())}; + stream.clear(); + stream.seekg(start_position + consumed); + } + + return result; } auto parse_json(const std::basic_string &input, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback) -> JSON { - return internal_parse_json(input, line, column, callback); + std::uint64_t &line, std::uint64_t &column) -> JSON { + const char *cursor{input.data()}; + return internal_parse_json(cursor, input.data() + input.size(), line, column, + true); +} + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +auto parse_json(std::basic_istream &stream) + -> JSON { + const auto start_position{stream.tellg()}; + std::basic_ostringstream buffer; + buffer << stream.rdbuf(); + const auto input{buffer.str()}; + const char *cursor{input.data()}; + const char *end{input.data() + input.size()}; + std::uint64_t line{1}; + std::uint64_t column{0}; + auto result{internal_parse_json(cursor, end, line, column, false)}; + if (start_position != static_cast(-1)) { + const auto consumed{static_cast(cursor - input.data())}; + stream.clear(); + stream.seekg(start_position + consumed); + } + return result; +} + +auto parse_json(const std::basic_string &input) + -> JSON { + std::uint64_t line{1}; + std::uint64_t column{0}; + const char *cursor{input.data()}; + return internal_parse_json(cursor, input.data() + input.size(), line, column, + false); +} + +auto read_json(const std::filesystem::path &path) -> JSON { + auto stream{read_file(path)}; + try { + return parse_json(stream); + } catch (const JSONParseError &error) { + // For producing better error messages + throw JSONFileParseError(path, error); + } +} + +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +auto parse_json(std::basic_istream &stream, + std::uint64_t &line, std::uint64_t &column, JSON &output, + const JSON::ParseCallback &callback) -> void { + const auto start_position{stream.tellg()}; + std::basic_ostringstream buffer; + buffer << stream.rdbuf(); + const auto input{buffer.str()}; + const char *cursor{input.data()}; + const char *end{input.data() + input.size()}; + internal_parse_json(cursor, end, line, column, callback, true, output); + if (start_position != static_cast(-1)) { + const auto consumed{static_cast(cursor - input.data())}; + stream.clear(); + stream.seekg(start_position + consumed); + } +} + +auto parse_json(const std::basic_string &input, + std::uint64_t &line, std::uint64_t &column, JSON &output, + const JSON::ParseCallback &callback) -> void { + const char *cursor{input.data()}; + internal_parse_json(cursor, input.data() + input.size(), line, column, + callback, true, output); } // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) auto parse_json(std::basic_istream &stream, - const JSON::ParseCallback &callback) -> JSON { + JSON &output, const JSON::ParseCallback &callback) -> void { + const auto start_position{stream.tellg()}; + std::basic_ostringstream buffer; + buffer << stream.rdbuf(); + const auto input{buffer.str()}; + const char *cursor{input.data()}; + const char *end{input.data() + input.size()}; std::uint64_t line{1}; std::uint64_t column{0}; - return parse_json(stream, line, column, callback); + internal_parse_json(cursor, end, line, column, callback, false, output); + if (start_position != static_cast(-1)) { + const auto consumed{static_cast(cursor - input.data())}; + stream.clear(); + stream.seekg(start_position + consumed); + } } auto parse_json(const std::basic_string &input, - const JSON::ParseCallback &callback) -> JSON { + JSON &output, const JSON::ParseCallback &callback) -> void { std::uint64_t line{1}; std::uint64_t column{0}; - return parse_json(input, line, column, callback); + const char *cursor{input.data()}; + internal_parse_json(cursor, input.data() + input.size(), line, column, + callback, false, output); } -auto read_json(const std::filesystem::path &path, - const JSON::ParseCallback &callback) -> JSON { +auto read_json(const std::filesystem::path &path, JSON &output, + const JSON::ParseCallback &callback) -> void { auto stream{read_file(path)}; try { - return parse_json(stream, callback); + parse_json(stream, output, callback); } catch (const JSONParseError &error) { // For producing better error messages throw JSONFileParseError(path, error); @@ -106,12 +233,4 @@ auto operator<<(std::basic_ostream &stream, } } -auto make_set(std::initializer_list types) -> JSON::TypeSet { - JSON::TypeSet result; - for (const auto type : types) { - result.set(static_cast(type)); - } - return result; -} - } // namespace sourcemeta::core diff --git a/vendor/core/src/core/json/json_value.cc b/vendor/core/src/core/json/json_value.cc index 6140dc00..1d5bf2a9 100644 --- a/vendor/core/src/core/json/json_value.cc +++ b/vendor/core/src/core/json/json_value.cc @@ -3,7 +3,7 @@ #include // std::find #include // assert -#include // std::isinf, std::isnan, std::modf, std::trunc +#include // std::isinf, std::isnan, std::modf #include // std::size_t #include // std::int64_t #include // std::reference_wrapper @@ -65,20 +65,19 @@ JSON::JSON(const Char *const value) : current_type{Type::String} { } JSON::JSON(std::initializer_list values) : current_type{Type::Array} { - new (&this->data_array) Array{values}; - -// For some reason, if we construct a JSON by passing a single -// JSON as argument, GCC and MSVC, in some circumstances will -// prefer this initializer list constructor over the default copy constructor, -// effectively creating an array of a single element. We couldn't find a nicer -// way to force them to pick the correct constructor. This is a hacky (and -// potentially inefficient?) way to "fix it up" to get consistent behavior -// across compilers. +// For direct-list-initialization (e.g. JSON x{other_json}), the C++ standard +// mandates that initializer_list constructors are preferred over copy/move +// constructors. GCC and MSVC follow this strictly, so a single-element brace +// init ends up here instead of the copy constructor. Handle this case before +// constructing the array to avoid an unnecessary heap allocation. #if defined(__GNUC__) || defined(_MSC_VER) if (values.size() == 1) { + this->current_type = Type::Null; this->operator=(*values.begin()); + return; } #endif + new (&this->data_array) Array{values}; } JSON::JSON(const Array &value) : current_type{Type::Array} { @@ -418,41 +417,6 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { return *this = *this - substractive; } -[[nodiscard]] auto JSON::is_boolean() const noexcept -> bool { - return this->current_type == Type::Boolean; -} - -[[nodiscard]] auto JSON::is_null() const noexcept -> bool { - return this->current_type == Type::Null; -} - -[[nodiscard]] auto JSON::is_integer() const noexcept -> bool { - return this->current_type == Type::Integer; -} - -[[nodiscard]] auto JSON::is_real() const noexcept -> bool { - return this->current_type == Type::Real; -} - -[[nodiscard]] auto JSON::is_integral() const noexcept -> bool { - switch (this->type()) { - case Type::Integer: - return true; - case Type::Real: { - Real integral = 0.0; - return std::modf(this->to_real(), &integral) == 0.0; - } - case Type::Decimal: - return this->to_decimal().is_integral(); - default: - return false; - } -} - -[[nodiscard]] auto JSON::is_number() const noexcept -> bool { - return this->is_integer() || this->is_real() || this->is_decimal(); -} - [[nodiscard]] auto JSON::is_positive() const noexcept -> bool { switch (this->type()) { case Type::Integer: @@ -466,141 +430,12 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { } } -[[nodiscard]] auto JSON::is_string() const noexcept -> bool { - return this->current_type == Type::String; -} - -[[nodiscard]] auto JSON::is_array() const noexcept -> bool { - return this->current_type == Type::Array; -} - -[[nodiscard]] auto JSON::is_object() const noexcept -> bool { - return this->current_type == Type::Object; -} - -[[nodiscard]] auto JSON::is_decimal() const noexcept -> bool { - return this->current_type == Type::Decimal; -} - -[[nodiscard]] auto JSON::type() const noexcept -> Type { - return this->current_type; -} - -[[nodiscard]] auto JSON::to_boolean() const noexcept -> bool { - assert(this->is_boolean()); - return this->data_boolean; -} - -[[nodiscard]] auto JSON::to_integer() const noexcept -> Integer { - assert(this->is_integer()); - return this->data_integer; -} - -[[nodiscard]] auto JSON::to_real() const noexcept -> Real { - assert(this->is_real()); - // This MUST not happen - assert(!std::isinf(this->data_real)); - assert(!std::isnan(this->data_real)); - return this->data_real; -} - -[[nodiscard]] auto JSON::to_decimal() const noexcept -> const Decimal & { - assert(this->is_decimal()); - // This MUST not happen - assert(this->data_decimal->is_finite()); - assert(!this->data_decimal->is_nan()); - return *this->data_decimal; -} - -[[nodiscard]] auto JSON::to_string() const noexcept -> const JSON::String & { - assert(this->is_string()); - return this->data_string; -} - [[nodiscard]] auto JSON::to_stringstream() const -> std::basic_istringstream> { return std::basic_istringstream>{ this->data_string}; } -[[nodiscard]] auto JSON::as_array() const noexcept -> const JSON::Array & { - assert(this->is_array()); - return this->data_array; -} - -[[nodiscard]] auto JSON::as_array() noexcept -> JSON::Array & { - assert(this->is_array()); - return this->data_array; -} - -[[nodiscard]] auto JSON::as_object() noexcept -> Object & { - assert(this->is_object()); - return this->data_object; -} - -[[nodiscard]] auto JSON::as_object() const noexcept -> const Object & { - assert(this->is_object()); - return this->data_object; -} - -[[nodiscard]] auto JSON::as_real() const noexcept -> Real { - assert(this->is_number()); - return this->is_real() ? this->to_real() - : static_cast(this->to_integer()); -} - -[[nodiscard]] auto JSON::as_integer() const noexcept -> Integer { - assert(this->is_number()); - if (this->is_integer()) { - return this->to_integer(); - } else { - return static_cast(std::trunc(this->to_real())); - } -} - -[[nodiscard]] auto JSON::at(const typename JSON::Array::size_type index) const - -> const JSON & { - assert(this->is_array()); - assert(index < this->size()); - return data_array.data.at(index); -} - -[[nodiscard]] auto JSON::at(const typename JSON::Array::size_type index) - -> JSON & { - assert(this->is_array()); - assert(index < this->size()); - return this->data_array.data.at(index); -} - -[[nodiscard]] auto JSON::at(const JSON::String &key) const -> const JSON & { - assert(this->is_object()); - assert(this->defines(key)); - const auto &object{this->data_object}; - return object.at(key, object.hash(key)); -} - -[[nodiscard]] auto JSON::at(const String &key, - const typename Object::hash_type hash) const - -> const JSON & { - assert(this->is_object()); - assert(this->defines(key)); - return this->data_object.at(key, hash); -} - -[[nodiscard]] auto JSON::at(const JSON::String &key) -> JSON & { - assert(this->is_object()); - assert(this->defines(key)); - auto &object{this->data_object}; - return object.at(key, object.hash(key)); -} - -[[nodiscard]] auto JSON::at(const String &key, - const typename Object::hash_type hash) -> JSON & { - assert(this->is_object()); - assert(this->defines(key)); - return this->data_object.at(key, hash); -} - [[nodiscard]] auto JSON::at_or(const String &key, const typename Object::hash_type hash, const JSON &otherwise) const -> const JSON & { @@ -615,60 +450,6 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { return this->at_or(key, this->data_object.hash(key), otherwise); } -[[nodiscard]] auto JSON::front() -> JSON & { - assert(this->is_array()); - assert(!this->empty()); - return this->data_array.data.front(); -} - -[[nodiscard]] auto JSON::front() const -> const JSON & { - assert(this->is_array()); - assert(!this->empty()); - return this->data_array.data.front(); -} - -[[nodiscard]] auto JSON::back() -> JSON & { - assert(this->is_array()); - assert(!this->empty()); - return this->data_array.data.back(); -} - -[[nodiscard]] auto JSON::back() const -> const JSON & { - assert(this->is_array()); - assert(!this->empty()); - return this->data_array.data.back(); -} - -[[nodiscard]] auto JSON::size() const -> std::size_t { - if (this->is_object()) { - return this->object_size(); - } else if (this->is_array()) { - return this->array_size(); - } else { - return this->string_size(); - } -} - -[[nodiscard]] auto JSON::string_size() const -> std::size_t { - assert(this->is_string()); - return JSON::size(this->data_string); -} - -[[nodiscard]] auto JSON::array_size() const -> std::size_t { - assert(this->is_array()); - return this->data_array.data.size(); -} - -[[nodiscard]] auto JSON::object_size() const -> std::size_t { - assert(this->is_object()); - return this->data_object.size(); -} - -[[nodiscard]] auto JSON::byte_size() const -> std::size_t { - assert(this->is_string()); - return this->data_string.size(); -} - [[nodiscard]] auto JSON::estimated_byte_size() const -> std::uint64_t { // Of course, container have some overhead of their own // which we are not taking into account here, as its typically @@ -794,48 +575,6 @@ auto JSON::operator-=(const JSON &substractive) -> JSON & { return dividend_decimal.divisible_by(divisor.to_decimal()); } -[[nodiscard]] auto JSON::empty() const -> bool { - if (this->is_object()) { - return this->data_object.empty(); - } else if (this->is_array()) { - return this->data_array.data.empty(); - } else { - return this->data_string.empty(); - } -} - -[[nodiscard]] auto JSON::try_at(const JSON::String &key) const -> const JSON * { - assert(this->is_object()); - const auto &object{this->data_object}; - return object.try_at(key, object.hash(key)); -} - -[[nodiscard]] auto JSON::try_at(const String &key, - const typename Object::hash_type hash) const - -> const JSON * { - assert(this->is_object()); - const auto &object{this->data_object}; - return object.try_at(key, hash); -} - -[[nodiscard]] auto JSON::defines(const JSON::String &key) const -> bool { - assert(this->is_object()); - const auto &object{this->data_object}; - return object.defines(key, object.hash(key)); -} - -[[nodiscard]] auto -JSON::defines(const JSON::String &key, - const typename JSON::Object::hash_type hash) const -> bool { - assert(this->is_object()); - return this->data_object.defines(key, hash); -} - -[[nodiscard]] auto -JSON::defines(const typename JSON::Array::size_type index) const -> bool { - return this->defines(std::to_string(index)); -} - [[nodiscard]] auto JSON::defines_any(std::initializer_list keys) const -> bool { return this->defines_any(keys.begin(), keys.end()); @@ -977,6 +716,12 @@ auto JSON::assign_assume_new(JSON::String &&key, JSON &&value) -> void { this->data_object.emplace_assume_new(std::move(key), std::move(value)); } +auto JSON::assign_assume_new(JSON::String &&key, JSON &&value, + Object::hash_type hash) -> void { + assert(this->is_object()); + this->data_object.emplace_assume_new(std::move(key), std::move(value), hash); +} + auto JSON::erase(const JSON::String &key) -> typename Object::size_type { assert(this->is_object()); return this->data_object.erase(key); diff --git a/vendor/core/src/core/json/parser.h b/vendor/core/src/core/json/parser.h index 7341549a..5dae8413 100644 --- a/vendor/core/src/core/json/parser.h +++ b/vendor/core/src/core/json/parser.h @@ -4,1325 +4,768 @@ #include #include -#include - #include "grammar.h" -#include // assert -#include // std::isxdigit -#include // std::isinf, std::isnan -#include // std::size_t -#include // std::uint64_t -#include // std::reference_wrapper -#include // std::basic_istream -#include // std::optional -#include // std::basic_ostringstream, std::basic_istringstream -#include // std::stack -#include // std::out_of_range -#include // std::basic_string, std::stoul +#include // assert +#include // std::uint64_t, std::uint32_t +#include // std::vector + +namespace sourcemeta::core { -namespace sourcemeta::core::internal { +enum class TapeType : std::uint8_t { + ObjectStart, + ObjectEnd, + ArrayStart, + ArrayEnd, + Key, + String, + Number, + Null, + True, + False +}; + +struct TapeEntry { + TapeType type; + std::uint32_t offset; + std::uint32_t length; + std::uint32_t count; + std::uint64_t line; + std::uint64_t column; +}; + +namespace internal { + +template +inline auto skip_whitespace(const char *&cursor, const char *end, + std::uint64_t &line, std::uint64_t &column) + -> void { + while (cursor < end) { + switch (*cursor) { + case internal::token_whitespace_space: + case internal::token_whitespace_tabulation: + case internal::token_whitespace_carriage_return: + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + continue; + case internal::token_whitespace_line_feed: + if constexpr (TrackPositions) { + line += 1; + column = 0; + } + cursor++; + continue; + default: + return; + } + } +} -inline auto parse_null( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream) - -> JSON { +template +inline auto scan_null(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { for ( const auto character : internal::constant_null.substr( 1)) { - column += 1; - if (stream.get() != character) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) { throw JSONParseError(line, column); } + if (*cursor != character) { + throw JSONParseError(line, column); + } + cursor++; } - - return JSON{nullptr}; } -inline auto parse_boolean_true( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream) - -> JSON { +template +inline auto scan_true(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { for ( const auto character : internal::constant_true.substr( 1)) { - column += 1; - if (stream.get() != character) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) { + throw JSONParseError(line, column); + } + if (*cursor != character) { throw JSONParseError(line, column); } + cursor++; } - - return JSON{true}; } -inline auto parse_boolean_false( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream) - -> JSON { +template +inline auto scan_false(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { for ( const auto character : internal::constant_false.substr( 1)) { - column += 1; - if (stream.get() != character) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) { + throw JSONParseError(line, column); + } + if (*cursor != character) { throw JSONParseError(line, column); } + cursor++; } - - return JSON{false}; } -auto parse_string_unicode_code_point( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream) +template +inline auto scan_string_unicode_code_point(const std::uint64_t line, + std::uint64_t &column, + const char *&cursor, const char *end) -> unsigned long { - std::basic_string> - code_point; - code_point.resize(4); - std::size_t code_point_size{0}; - - // Any code point may be represented as a hexadecimal escape sequence. - // The meaning of such a hexadecimal number is determined by ISO/IEC - // 10646. If the code point is in the Basic Multilingual Plane (U+0000 - // through U+FFFF), then it may be represented as a six-character - // sequence: a reverse solidus, followed by the lowercase letter u, - // followed by four hexadecimal digits that encode the code point. - // Hexadecimal digits can be digits (U+0030 through U+0039) or the - // hexadecimal letters A through F in uppercase (U+0041 through U+0046) - // or lowercase (U+0061 through U+0066). - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - while (code_point_size < 4) { - column += 1; - code_point[code_point_size] = - static_cast(stream.get()); - if (std::isxdigit(code_point[code_point_size])) { - code_point_size += 1; + unsigned long result{0}; + for (std::size_t index = 0; index < 4; index++) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) { + throw JSONParseError(line, column); + } + const char hex_char{*cursor++}; + unsigned long digit; + if (hex_char >= '0' && hex_char <= '9') { + digit = static_cast(hex_char - '0'); + } else if (hex_char >= 'a' && hex_char <= 'f') { + digit = static_cast(hex_char - 'a') + 10; + } else if (hex_char >= 'A' && hex_char <= 'F') { + digit = static_cast(hex_char - 'A') + 10; } else { throw JSONParseError(line, column); } + result = (result << 4) | digit; } - // We don't need to perform any further validation here. - // According to ECMA 404, \u can be followed by "any" - // sequence of 4 hexadecimal digits. - constexpr auto unicode_base{16}; - const auto result{std::stoul(code_point, nullptr, unicode_base)}; - // The largest possible valid unicode code point assert(result <= 0xFFFF); return result; } -auto parse_string_unicode( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream, - std::basic_ostringstream> - &result) -> void { - auto code_point{parse_string_unicode_code_point(line, column, stream)}; +template +inline auto scan_string_unicode(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { + auto code_point{scan_string_unicode_code_point(line, column, + cursor, end)}; using CharT = typename JSON::Char; - // This means we are at the beginning of a UTF-16 surrogate pair high code - // point See - // https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF + if (code_point >= 0xDC00 && code_point <= 0xDFFF) { + throw JSONParseError(line, column); + } + if (code_point >= 0xD800 && code_point <= 0xDBFF) { - // Next, we expect "\" - column += 1; - if (stream.get() != internal::token_string_escape) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) { throw JSONParseError(line, column); } + if (*cursor != internal::token_string_escape) { + throw JSONParseError(line, column); + } + cursor++; - // Next, we expect "u" - column += 1; - if (stream.get() != internal::token_string_escape_unicode) { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) { + throw JSONParseError(line, column); + } + if (*cursor != internal::token_string_escape_unicode) { throw JSONParseError(line, column); } + cursor++; - // Finally, get the low code point of the surrogate and calculate - // the real final code point - const auto low_code_point{ - parse_string_unicode_code_point(line, column, stream)}; + const auto low_code_point{scan_string_unicode_code_point( + line, column, cursor, end)}; // See // https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF - if (low_code_point >= 0xDC00 && low_code_point <= 0xDFFF) { - code_point = - 0x10000 + ((code_point - 0xD800) << 10) + (low_code_point - 0xDC00); - } else { + if (low_code_point < 0xDC00 || low_code_point > 0xDFFF) { throw JSONParseError(line, column); } } - - // Convert a Unicode codepoint into UTF-8 - // See https://en.wikipedia.org/wiki/UTF-8#Description - - if (code_point <= 0x7F) { - // UTF-8 - result.put(static_cast(code_point)); - } else if (code_point <= 0x7FF) { - // UTF-16 - result.put(static_cast(0xC0 | ((code_point >> 6) & 0x1F))); - result.put(static_cast(0x80 | (code_point & 0x3F))); - } else { - // UTF-32 - result.put(static_cast(0xE0 | ((code_point >> 12) & 0x0F))); - result.put(static_cast(0x80 | ((code_point >> 6) & 0x3F))); - result.put(static_cast(0x80 | (code_point & 0x3F))); - } } -auto parse_string_escape( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream, - std::basic_ostringstream> - &result) -> void { - column += 1; - switch (stream.get()) { +template +inline auto scan_string_escape(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { + if constexpr (TrackPositions) { + column += 1; + } + if (cursor >= end) { + throw JSONParseError(line, column); + } + switch (*cursor++) { case internal::token_string_quote: - result.put(internal::token_string_quote); - return; case internal::token_string_escape: - result.put(internal::token_string_escape); - return; case internal::token_string_solidus: - result.put(internal::token_string_solidus); - return; case internal::token_string_escape_backspace: - result.put('\b'); - return; case internal::token_string_escape_form_feed: - result.put('\f'); - return; case internal::token_string_escape_line_feed: - result.put('\n'); - return; case internal::token_string_escape_carriage_return: - result.put('\r'); - return; case internal::token_string_escape_tabulation: - result.put('\t'); return; - - // Any code point may be represented as a hexadecimal escape sequence. - // The meaning of such a hexadecimal number is determined by ISO/IEC - // 10646. If the code point is in the Basic Multilingual Plane (U+0000 - // through U+FFFF), then it may be represented as a six-character - // sequence: a reverse solidus, followed by the lowercase letter u, - // followed by four hexadecimal digits that encode the code point. - // Hexadecimal digits can be digits (U+0030 through U+0039) or the - // hexadecimal letters A through F in uppercase (U+0041 through U+0046) - // or lowercase (U+0061 through U+0066). - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf case internal::token_string_escape_unicode: - parse_string_unicode(line, column, stream, result); + scan_string_unicode(line, column, cursor, end); return; - default: throw JSONParseError(line, column); } } -auto parse_string( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream) - -> typename JSON::String { - std::basic_ostringstream> - result; - while (!stream.eof()) { - column += 1; - const typename JSON::Char character{ - static_cast(stream.get())}; - switch (character) { - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - return result.str(); - case internal::token_string_escape: - parse_string_escape(line, column, stream, result); - break; - // These are always disallowed - case '\u0000': - case '\u0001': - case '\u0002': - case '\u0003': - case '\u0004': - case '\u0005': - case '\u0006': - case '\u0007': - case '\u0008': - case '\u0009': - case '\u000A': - case '\u000B': - case '\u000C': - case '\u000D': - case '\u000E': - case '\u000F': - case '\u0010': - case '\u0011': - case '\u0012': - case '\u0013': - case '\u0014': - case '\u0015': - case '\u0016': - case '\u0017': - case '\u0018': - case '\u0019': - case '\u001A': - case '\u001B': - case '\u001C': - case '\u001D': - case '\u001E': - case '\u001F': - case static_cast(JSON::CharTraits::eof()): - throw JSONParseError(line, column); - default: - result.put(character); - break; +template +inline auto scan_string(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end) -> void { + using CharT = typename JSON::Char; + while (cursor < end) { + const char *scan{cursor}; + while (scan < end && *scan != internal::token_string_quote && + *scan != internal::token_string_escape && + static_cast(*scan) >= 0x20) { + scan++; } - } - throw JSONParseError(line, column); -} - -template -auto parse_number_decimal(const std::uint64_t line, const std::uint64_t column, - const std::basic_string &string) - -> JSON { - try { - return JSON{Decimal{string}}; - } catch (const DecimalParseError &) { - throw JSONParseError(line, column); - } catch (const std::invalid_argument &) { - throw JSONParseError(line, column); - } -} - -template -auto parse_number_integer_maybe_decimal( - const std::uint64_t line, const std::uint64_t column, - const std::basic_string &string) -> JSON { - const auto result{sourcemeta::core::to_int64_t(string)}; - return result.has_value() ? JSON{result.value()} - : parse_number_decimal(line, column, string); -} - -template -auto parse_number_real_maybe_decimal( - const std::uint64_t line, const std::uint64_t column, - const std::basic_string &string, - const std::size_t first_nonzero_position, - const std::size_t decimal_position) -> JSON { - // We are guaranteed to not be dealing with exponential numbers here - assert((string.find('e') == std::basic_string::npos)); - assert((string.find('E') == std::basic_string::npos)); + if (scan > cursor) { + if constexpr (TrackPositions) { + column += static_cast(scan - cursor); + } + cursor = scan; + } - // If the number has enough significant digits, then we risk completely losing - // precision of the fractional component, and thus incorrectly interpreting a - // fractional number as an integral value - const auto decimal_after_first_nonzero{ - decimal_position != std::basic_string::npos && - decimal_position > first_nonzero_position}; - const auto significant_digits{string.length() - first_nonzero_position - - (decimal_after_first_nonzero ? 1 : 0)}; - constexpr std::size_t MAX_SAFE_SIGNIFICANT_DIGITS{15}; - if (significant_digits > MAX_SAFE_SIGNIFICANT_DIGITS) { - return parse_number_decimal(line, column, string); - } + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } - const auto result{sourcemeta::core::to_double(string)}; - return result.has_value() ? JSON{result.value()} - : parse_number_decimal(line, column, string); -} + if constexpr (TrackPositions) { + column += 1; + } + const char character{*cursor++}; -auto parse_number_exponent_rest( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result) -> JSON { - while (!stream.eof()) { - const typename JSON::Char character{ - static_cast(stream.peek())}; switch (character) { - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.put(character); - stream.ignore(1); - column += 1; + case internal::token_string_quote: + return; + case internal::token_string_escape: + scan_string_escape(line, column, cursor, end); break; default: - // As a heuristic, if a number has exponential notation, it is almost - // always a big number for which `double` is typically a poor - // representation. If an exponent is encountered, we just always parse - // as a high-precision decimal - return parse_number_decimal(line, original_column, result.str()); + throw JSONParseError(line, column); } } + if constexpr (TrackPositions) { + column += 1; + } throw JSONParseError(line, column); } -auto parse_number_exponent( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result) -> JSON { - const typename JSON::Char character{ - static_cast(stream.get())}; - column += 1; - switch (character) { - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.put(character); - return parse_number_exponent_rest(line, column, original_column, stream, - result); - default: - throw JSONParseError(line, column); +template +inline auto scan_digits(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end, + const bool at_least_one) -> void { + using CharT = typename JSON::Char; + bool found{false}; + while (cursor < end && *cursor >= internal::token_number_zero && + *cursor <= internal::token_number_nine) { + found = true; + if constexpr (TrackPositions) { + column += 1; + } + cursor++; } -} - -auto parse_number_exponent_first( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result) -> JSON { - const typename JSON::Char character{ - static_cast(stream.get())}; - column += 1; - switch (character) { - case internal::token_number_plus: - // Exponents are positive by default, - // so no need to write the plus sign. - return parse_number_exponent(line, column, original_column, stream, - result); - case internal::token_number_minus: - result.put(character); - return parse_number_exponent(line, column, original_column, stream, - result); - - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.put(character); - return parse_number_exponent_rest(line, column, original_column, stream, - result); - default: - throw JSONParseError(line, column); + if (at_least_one && !found) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } } -auto parse_number_fractional( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result, - std::size_t &first_nonzero_position, const std::size_t decimal_position) - -> JSON { - while (!stream.eof()) { - const typename JSON::Char character{ - static_cast(stream.peek())}; - switch (character) { - // [A number] may have an exponent, prefixed by e (U+0065) or E (U+0045) - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_exponent_uppercase: - case internal::token_number_exponent_lowercase: - result.put(character); - stream.ignore(1); - column += 1; - return parse_number_exponent_first(line, column, original_column, - stream, result); - - case internal::token_number_zero: - result.put(character); - stream.ignore(1); - column += 1; - break; - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (first_nonzero_position == - std::basic_string::npos) { - first_nonzero_position = result.str().size(); - } - result.put(character); - stream.ignore(1); +template +inline auto scan_number(const std::uint64_t line, std::uint64_t &column, + const char *&cursor, const char *end, const char first) + -> void { + using CharT = typename JSON::Char; + if (first == internal::token_number_minus) { + if (cursor >= end || *cursor < internal::token_number_zero || + *cursor > internal::token_number_nine) { + if constexpr (TrackPositions) { column += 1; - break; - default: - return parse_number_real_maybe_decimal( - line, original_column, result.str(), first_nonzero_position, - decimal_position); + } + throw JSONParseError(line, column); } } - throw JSONParseError(line, column); -} - -auto parse_number_fractional_first( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result, - std::size_t &first_nonzero_position, const std::size_t decimal_position) - -> JSON { - const typename JSON::Char character{ - static_cast(stream.peek())}; - switch (character) { - // [A number] may have a fractional part prefixed by a decimal point - // (U+002E). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_decimal_point: - case static_cast(JSON::CharTraits::eof()): - column += 1; - throw JSONParseError(line, column); - case internal::token_number_zero: - result.put(character); - stream.ignore(1); + const char int_start{first == internal::token_number_minus ? *cursor + : first}; + if (first == internal::token_number_minus) { + if constexpr (TrackPositions) { column += 1; - return parse_number_fractional(line, column, original_column, stream, - result, first_nonzero_position, - decimal_position); - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (first_nonzero_position == - std::basic_string::npos) { - first_nonzero_position = result.str().size(); + } + cursor++; + } + + if (int_start == internal::token_number_zero) { + if (cursor < end && *cursor >= internal::token_number_zero && + *cursor <= internal::token_number_nine) { + if constexpr (TrackPositions) { + column += 1; } - result.put(character); - stream.ignore(1); - column += 1; - return parse_number_fractional(line, column, original_column, stream, - result, first_nonzero_position, - decimal_position); - default: - return parse_number_real_maybe_decimal( - line, original_column, result.str(), first_nonzero_position, - decimal_position); + throw JSONParseError(line, column); + } + } else { + scan_digits(line, column, cursor, end, false); } -} -auto parse_number_maybe_fractional( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result, - std::size_t &first_nonzero_position) -> JSON { - const typename JSON::Char character{ - static_cast(stream.peek())}; - switch (character) { - // [A number] may have a fractional part prefixed by a decimal point - // (U+002E). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_decimal_point: { - const std::size_t decimal_position{result.str().size()}; - result.put(character); - stream.ignore(1); + if (cursor < end && *cursor == internal::token_number_decimal_point) { + if constexpr (TrackPositions) { column += 1; - return JSON{parse_number_fractional_first( - line, column, original_column, stream, result, first_nonzero_position, - decimal_position)}; } - case internal::token_number_exponent_uppercase: - case internal::token_number_exponent_lowercase: - result.put(character); - stream.ignore(1); - column += 1; - return JSON{parse_number_exponent_first(line, column, original_column, - stream, result)}; - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - column += 1; - throw JSONParseError(line, column); - default: - return JSON{parse_number_integer_maybe_decimal(line, original_column, - result.str())}; + cursor++; + scan_digits(line, column, cursor, end, true); } -} -auto parse_number_any_rest( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result, - std::size_t &first_nonzero_position) -> JSON { - while (!stream.eof()) { - const typename JSON::Char character{ - static_cast(stream.peek())}; - switch (character) { - // [A number] may have a fractional part prefixed by a decimal point - // (U+002E). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_decimal_point: { - const std::size_t decimal_position{result.str().size()}; - result.put(character); - stream.ignore(1); + if (cursor < end && + (*cursor == internal::token_number_exponent_lowercase || + *cursor == internal::token_number_exponent_uppercase)) { + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + if (cursor < end && (*cursor == internal::token_number_plus || + *cursor == internal::token_number_minus)) { + if constexpr (TrackPositions) { column += 1; - return JSON{parse_number_fractional_first( - line, column, original_column, stream, result, - first_nonzero_position, decimal_position)}; } - case internal::token_number_exponent_uppercase: - case internal::token_number_exponent_lowercase: - result.put(character); - stream.ignore(1); - column += 1; - return JSON{parse_number_exponent_first(line, column, original_column, - stream, result)}; - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - result.put(character); - stream.ignore(1); - column += 1; - break; - default: - return JSON{parse_number_integer_maybe_decimal(line, original_column, - result.str())}; + cursor++; } - } - - throw JSONParseError(line, column); -} - -auto parse_number_any_negative_first( - const std::uint64_t line, std::uint64_t &column, - const std::uint64_t original_column, - std::basic_istream &stream, - std::basic_ostringstream> - &result, - std::size_t &first_nonzero_position) -> JSON { - const typename JSON::Char character{ - static_cast(stream.get())}; - column += 1; - switch (character) { - // A number is a sequence of decimal digits with no superfluous leading - // zero. See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_number_zero: - result.put(character); - return parse_number_maybe_fractional(line, column, original_column, - stream, result, - first_nonzero_position); - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - first_nonzero_position = result.str().size(); - result.put(character); - return parse_number_any_rest(line, column, original_column, stream, - result, first_nonzero_position); - default: - throw JSONParseError(line, column); + scan_digits(line, column, cursor, end, true); } } -auto parse_number( - const std::uint64_t line, std::uint64_t &column, - std::basic_istream &stream, - const typename JSON::Char first) -> JSON { - std::basic_ostringstream> - result; - result.put(first); - - std::size_t first_nonzero_position{ - std::basic_string::npos}; +} // namespace internal - // A number is a sequence of decimal digits with no superfluous leading zero. - // It may have a preceding minus sign (U+002D). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - switch (first) { - case internal::token_number_minus: - return parse_number_any_negative_first(line, column, column, stream, - result, first_nonzero_position); - case internal::token_number_zero: - return parse_number_maybe_fractional(line, column, column, stream, result, - first_nonzero_position); - // Any other digit - default: - first_nonzero_position = 0; - return parse_number_any_rest(line, column, column, stream, result, - first_nonzero_position); - } -} +// NOLINTBEGIN(cppcoreguidelines-avoid-goto) -} // namespace sourcemeta::core::internal +template +inline auto scan_json(const char *&cursor, const char *end, + const char *buffer_start, std::uint64_t &line, + std::uint64_t &column, std::vector &tape) + -> void { + struct ContainerFrame { + std::size_t tape_index; + std::uint32_t child_count; + }; -// We use "goto" to avoid recursion -// NOLINTBEGIN(cppcoreguidelines-avoid-goto) + using CharT = typename JSON::Char; + char character = 0; + std::vector container_stack; + container_stack.reserve(32); -#define CALLBACK_PRE(value_type, context, index, property) \ - if (callback) { \ - callback(JSON::ParsePhase::Pre, JSON::Type::value_type, line, column, \ - context, index, property); \ + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } - -#define CALLBACK_PRE_WITH_POSITION(value_type, line, column, context, index, \ - property) \ - if (callback) { \ - callback(JSON::ParsePhase::Pre, JSON::Type::value_type, line, column, \ - context, index, property); \ + if constexpr (TrackPositions) { + column += 1; } + character = *cursor++; -#define CALLBACK_POST(value_type) \ - if (callback) { \ - callback(JSON::ParsePhase::Post, JSON::Type::value_type, line, column, \ - JSON::ParseContext::Root, 0, JSON::StringView{}); \ + { + const auto value_line{line}; + const auto value_column{column}; + switch (character) { + case internal::token_true: + internal::scan_true(line, column, cursor, end); + tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column}); + return; + case internal::token_false: + internal::scan_false(line, column, cursor, end); + tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column}); + return; + case internal::token_null: + internal::scan_null(line, column, cursor, end); + tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column}); + return; + case internal::token_string_quote: { + const auto string_start{ + static_cast(cursor - buffer_start)}; + internal::scan_string(line, column, cursor, end); + const auto string_length{static_cast( + cursor - buffer_start - string_start - 1)}; + tape.push_back({TapeType::String, string_start, string_length, 0, + value_line, value_column}); + return; + } + case internal::token_array_begin: + goto do_scan_array; + case internal::token_object_begin: + goto do_scan_object; + case internal::token_number_minus: + case internal::token_number_zero: + case internal::token_number_one: + case internal::token_number_two: + case internal::token_number_three: + case internal::token_number_four: + case internal::token_number_five: + case internal::token_number_six: + case internal::token_number_seven: + case internal::token_number_eight: + case internal::token_number_nine: { + const auto number_start{ + static_cast(cursor - buffer_start - 1)}; + internal::scan_number(line, column, cursor, end, + character); + const auto number_length{ + static_cast(cursor - buffer_start - number_start)}; + tape.push_back({TapeType::Number, number_start, number_length, 0, + value_line, value_column}); + return; + } + default: + throw JSONParseError(line, column); + } } -namespace sourcemeta::core { -auto internal_parse_json( - std::basic_istream &stream, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback) -> JSON { - // Globals - using Result = JSON; - enum class Container : std::uint8_t { Array, Object }; - std::stack levels; - std::stack> frames; - std::optional result; - typename Result::String key{""}; - std::uint64_t key_line{0}; - std::uint64_t key_column{0}; - typename JSON::Char character = 0; - /* - * Parse any JSON document + * Scan an array */ -do_parse: - column += 1; - character = static_cast(stream.get()); +do_scan_array: { + const auto start_index{tape.size()}; + tape.push_back({TapeType::ArrayStart, 0, 0, 0, line, column}); + container_stack.push_back({start_index, 0}); - // A JSON value can be an object, array, number, string, true, false, or null. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - switch (character) { - case internal::constant_true.front(): - if (callback) { - CALLBACK_PRE(Boolean, JSON::ParseContext::Root, 0, JSON::StringView{}); - const auto value{internal::parse_boolean_true(line, column, stream)}; - CALLBACK_POST(Boolean); - return value; - } else { - return internal::parse_boolean_true(line, column, stream); - } - case internal::constant_false.front(): - if (callback) { - CALLBACK_PRE(Boolean, JSON::ParseContext::Root, 0, JSON::StringView{}); - const auto value{internal::parse_boolean_false(line, column, stream)}; - CALLBACK_POST(Boolean); - return value; - } else { - return internal::parse_boolean_false(line, column, stream); - } - case internal::constant_null.front(): - if (callback) { - CALLBACK_PRE(Null, JSON::ParseContext::Root, 0, JSON::StringView{}); - const auto value{internal::parse_null(line, column, stream)}; - CALLBACK_POST(Null); - return value; - } else { - return internal::parse_null(line, column, stream); - } - - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - if (callback) { - CALLBACK_PRE(String, JSON::ParseContext::Root, 0, JSON::StringView{}); - const Result value{internal::parse_string(line, column, stream)}; - CALLBACK_POST(String); - return value; - } else { - return Result{internal::parse_string(line, column, stream)}; - } - case internal::token_array_begin: - CALLBACK_PRE(Array, JSON::ParseContext::Root, 0, JSON::StringView{}); - goto do_parse_array; - case internal::token_object_begin: - CALLBACK_PRE(Object, JSON::ParseContext::Root, 0, JSON::StringView{}); - goto do_parse_object; + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } - case internal::token_number_minus: - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (callback) { - const auto current_line{line}; - const auto current_column{column}; - const auto value{ - internal::parse_number(line, column, stream, character)}; - if (value.is_integer()) { - CALLBACK_PRE_WITH_POSITION(Integer, current_line, current_column, - JSON::ParseContext::Root, 0, - JSON::StringView{}); - CALLBACK_POST(Integer); - } else if (value.is_decimal()) { - CALLBACK_PRE_WITH_POSITION(Decimal, current_line, current_column, - JSON::ParseContext::Root, 0, - JSON::StringView{}); - CALLBACK_POST(Decimal); - } else { - CALLBACK_PRE_WITH_POSITION(Real, current_line, current_column, - JSON::ParseContext::Root, 0, - JSON::StringView{}); - CALLBACK_POST(Real); - } + if (*cursor == internal::token_array_end) { + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + tape[start_index].count = 0; + tape.push_back({TapeType::ArrayEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } - return value; - } + goto do_scan_array_item; +} - return internal::parse_number(line, column, stream, character); +do_scan_array_item: + assert(!container_stack.empty()); + container_stack.back().child_count++; - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse; - default: - throw JSONParseError(line, column); + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } - - /* - * Parse an array - */ - -do_parse_array: - if (levels.empty()) { - assert(!result.has_value()); - levels.emplace(Container::Array); - result = std::make_optional(Result::make_array()); - frames.emplace(result.value()); - } else if (levels.top() == Container::Array) { - assert(result.has_value()); - levels.emplace(Container::Array); - assert(!frames.empty()); - assert(frames.top().get().is_array()); - frames.top().get().push_back(Result::make_array()); - frames.emplace(frames.top().get().back()); - } else if (levels.top() == Container::Object) { - assert(result.has_value()); - levels.emplace(Container::Array); - assert(!frames.empty()); - assert(frames.top().get().is_object()); - frames.top().get().assign(key, Result::make_array()); - frames.emplace(frames.top().get().at(key)); + if constexpr (TrackPositions) { + column += 1; } + character = *cursor++; - // An array structure is a pair of square bracket tokens surrounding zero or - // more values. The values are separated by commas. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - -do_parse_array_item: - assert(levels.top() == Container::Array); - column += 1; - character = static_cast(stream.get()); - switch (character) { - // Positional - case internal::token_array_end: - if (frames.top().get().empty()) { - CALLBACK_POST(Array); - goto do_parse_container_end; - } else { - throw JSONParseError(line, column); + { + const auto value_line{line}; + const auto value_column{column}; + switch (character) { + case internal::token_array_begin: + goto do_scan_array; + case internal::token_object_begin: + goto do_scan_object; + case internal::token_true: + internal::scan_true(line, column, cursor, end); + tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column}); + goto do_scan_array_item_separator; + case internal::token_false: + internal::scan_false(line, column, cursor, end); + tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column}); + goto do_scan_array_item_separator; + case internal::token_null: + internal::scan_null(line, column, cursor, end); + tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column}); + goto do_scan_array_item_separator; + case internal::token_string_quote: { + const auto string_start{ + static_cast(cursor - buffer_start)}; + internal::scan_string(line, column, cursor, end); + const auto string_length{static_cast( + cursor - buffer_start - string_start - 1)}; + tape.push_back({TapeType::String, string_start, string_length, 0, + value_line, value_column}); + goto do_scan_array_item_separator; } - - // Values - case internal::token_array_begin: - CALLBACK_PRE(Array, JSON::ParseContext::Index, frames.top().get().size(), - JSON::StringView{}); - goto do_parse_array; - case internal::token_object_begin: - CALLBACK_PRE(Object, JSON::ParseContext::Index, frames.top().get().size(), - JSON::StringView{}); - goto do_parse_object; - case internal::constant_true.front(): - CALLBACK_PRE(Boolean, JSON::ParseContext::Index, - frames.top().get().size(), JSON::StringView{}); - frames.top().get().push_back( - internal::parse_boolean_true(line, column, stream)); - CALLBACK_POST(Boolean); - goto do_parse_array_item_separator; - case internal::constant_false.front(): - CALLBACK_PRE(Boolean, JSON::ParseContext::Index, - frames.top().get().size(), JSON::StringView{}); - frames.top().get().push_back( - internal::parse_boolean_false(line, column, stream)); - CALLBACK_POST(Boolean); - goto do_parse_array_item_separator; - case internal::constant_null.front(): - CALLBACK_PRE(Null, JSON::ParseContext::Index, frames.top().get().size(), - JSON::StringView{}); - frames.top().get().push_back(internal::parse_null(line, column, stream)); - CALLBACK_POST(Null); - goto do_parse_array_item_separator; - - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - CALLBACK_PRE(String, JSON::ParseContext::Index, frames.top().get().size(), - JSON::StringView{}); - frames.top().get().push_back( - Result{internal::parse_string(line, column, stream)}); - CALLBACK_POST(String); - goto do_parse_array_item_separator; - - case internal::token_number_minus: - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (callback) { - const auto current_line{line}; - const auto current_column{column}; - const auto current_index{frames.top().get().size()}; - const auto value{ - internal::parse_number(line, column, stream, character)}; - if (value.is_integer()) { - CALLBACK_PRE_WITH_POSITION(Integer, current_line, current_column, - JSON::ParseContext::Index, current_index, - JSON::StringView{}); - } else if (value.is_decimal()) { - CALLBACK_PRE_WITH_POSITION(Decimal, current_line, current_column, - JSON::ParseContext::Index, current_index, - JSON::StringView{}); - } else { - CALLBACK_PRE_WITH_POSITION(Real, current_line, current_column, - JSON::ParseContext::Index, current_index, - JSON::StringView{}); - } - - frames.top().get().push_back(value); - - if (value.is_integer()) { - CALLBACK_POST(Integer); - } else if (value.is_decimal()) { - CALLBACK_POST(Decimal); - } else { - CALLBACK_POST(Real); - } - } else { - frames.top().get().push_back( - internal::parse_number(line, column, stream, character)); + case internal::token_number_minus: + case internal::token_number_zero: + case internal::token_number_one: + case internal::token_number_two: + case internal::token_number_three: + case internal::token_number_four: + case internal::token_number_five: + case internal::token_number_six: + case internal::token_number_seven: + case internal::token_number_eight: + case internal::token_number_nine: { + const auto number_start{ + static_cast(cursor - buffer_start - 1)}; + internal::scan_number(line, column, cursor, end, + character); + const auto number_length{ + static_cast(cursor - buffer_start - number_start)}; + tape.push_back({TapeType::Number, number_start, number_length, 0, + value_line, value_column}); + goto do_scan_array_item_separator; } - - goto do_parse_array_item_separator; - - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_array_item; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_array_item; - default: - goto error; + default: + throw JSONParseError(line, column); + } } -do_parse_array_item_separator: - assert(levels.top() == Container::Array); - column += 1; - character = static_cast(stream.get()); +do_scan_array_item_separator: + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; switch (character) { - // Positional - case internal::token_array_delimiter: - goto do_parse_array_item; - case internal::token_array_end: - CALLBACK_POST(Array); - goto do_parse_container_end; - - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_array_item_separator; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_array_item_separator; + case internal::token_array_delimiter: + goto do_scan_array_item; + case internal::token_array_end: { + assert(!container_stack.empty()); + auto &frame{container_stack.back()}; + tape[frame.tape_index].count = frame.child_count; + tape.push_back({TapeType::ArrayEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } default: - goto error; + throw JSONParseError(line, column); } /* - * Parse an object + * Scan an object */ -do_parse_object: - if (levels.empty()) { - assert(levels.empty()); - assert(!result.has_value()); - levels.emplace(Container::Object); - result = std::make_optional(Result::make_object()); - frames.emplace(result.value()); - } else if (levels.top() == Container::Array) { - assert(result.has_value()); - levels.emplace(Container::Object); - assert(!frames.empty()); - assert(frames.top().get().is_array()); - frames.top().get().push_back(Result::make_object()); - frames.emplace(frames.top().get().back()); - } else if (levels.top() == Container::Object) { - assert(result.has_value()); - levels.emplace(Container::Object); - assert(!frames.empty()); - assert(frames.top().get().is_object()); - frames.top().get().assign(key, Result::make_object()); - frames.emplace(frames.top().get().at(key)); +do_scan_object: { + const auto start_index{tape.size()}; + tape.push_back({TapeType::ObjectStart, 0, 0, 0, line, column}); + container_stack.push_back({start_index, 0}); + + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } - // An object structure is represented as a pair of curly bracket tokens - // surrounding zero or more name/value pairs. A name is a string. A single - // colon token follows each name, separating the name from the value. A - // single comma token separates a value from a following name. See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf + if (*cursor == internal::token_object_end) { + if constexpr (TrackPositions) { + column += 1; + } + cursor++; + tape[start_index].count = 0; + tape.push_back({TapeType::ObjectEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } -do_parse_object_property_key: - assert(levels.top() == Container::Object); - column += 1; - character = static_cast(stream.get()); - switch (character) { - case internal::token_object_end: - if (frames.top().get().empty()) { - CALLBACK_POST(Object); - goto do_parse_container_end; - } else { - goto error; - } + goto do_scan_object_key; +} - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - key_line = line; - key_column = column; - key = internal::parse_string(line, column, stream); - goto do_parse_object_property_separator; +do_scan_object_key: + assert(!container_stack.empty()); + container_stack.back().child_count++; - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_object_property_key; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_object_property_key; - default: - goto error; + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); } - -do_parse_object_property_separator: - assert(levels.top() == Container::Object); - column += 1; - character = static_cast(stream.get()); + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; switch (character) { - case internal::token_object_key_delimiter: - goto do_parse_object_property_value; - - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_object_property_separator; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_object_property_separator; + case internal::token_string_quote: { + const auto key_start{static_cast(cursor - buffer_start)}; + const auto key_line{line}; + const auto key_column{column}; + internal::scan_string(line, column, cursor, end); + const auto key_length{ + static_cast(cursor - buffer_start - key_start - 1)}; + tape.push_back( + {TapeType::Key, key_start, key_length, 0, key_line, key_column}); + goto do_scan_object_separator; + } default: - goto error; + throw JSONParseError(line, column); } -do_parse_object_property_value: - assert(levels.top() == Container::Object); - column += 1; - character = static_cast(stream.get()); +do_scan_object_separator: + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; switch (character) { - // Values - case internal::token_array_begin: - CALLBACK_PRE_WITH_POSITION(Array, key_line, key_column, - JSON::ParseContext::Property, 0, key); - goto do_parse_array; - case internal::token_object_begin: - CALLBACK_PRE_WITH_POSITION(Object, key_line, key_column, - JSON::ParseContext::Property, 0, key); - goto do_parse_object; - case internal::constant_true.front(): - CALLBACK_PRE_WITH_POSITION(Boolean, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.top().get().assign( - key, internal::parse_boolean_true(line, column, stream)); - CALLBACK_POST(Boolean); - goto do_parse_object_property_end; - case internal::constant_false.front(): - CALLBACK_PRE_WITH_POSITION(Boolean, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.top().get().assign( - key, internal::parse_boolean_false(line, column, stream)); - CALLBACK_POST(Boolean); - goto do_parse_object_property_end; - case internal::constant_null.front(): - CALLBACK_PRE_WITH_POSITION(Null, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.top().get().assign(key, - internal::parse_null(line, column, stream)); - CALLBACK_POST(Null); - goto do_parse_object_property_end; - - // A string is a sequence of Unicode code points wrapped with quotation - // marks (U+0022). See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_string_quote: - CALLBACK_PRE_WITH_POSITION(String, key_line, key_column, - JSON::ParseContext::Property, 0, key); - frames.top().get().assign( - key, Result{internal::parse_string(line, column, stream)}); - CALLBACK_POST(String); - goto do_parse_object_property_end; - - case internal::token_number_minus: - case internal::token_number_zero: - case internal::token_number_one: - case internal::token_number_two: - case internal::token_number_three: - case internal::token_number_four: - case internal::token_number_five: - case internal::token_number_six: - case internal::token_number_seven: - case internal::token_number_eight: - case internal::token_number_nine: - if (callback) { - const auto value{ - internal::parse_number(line, column, stream, character)}; - if (value.is_integer()) { - CALLBACK_PRE_WITH_POSITION(Integer, key_line, key_column, - JSON::ParseContext::Property, 0, key); - } else if (value.is_decimal()) { - CALLBACK_PRE_WITH_POSITION(Decimal, key_line, key_column, - JSON::ParseContext::Property, 0, key); - } else { - CALLBACK_PRE_WITH_POSITION(Real, key_line, key_column, - JSON::ParseContext::Property, 0, key); - } + case internal::token_object_key_delimiter: + goto do_scan_object_value; + default: + throw JSONParseError(line, column); + } - frames.top().get().assign(key, value); +do_scan_object_value: + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; - if (value.is_integer()) { - CALLBACK_POST(Integer); - } else if (value.is_decimal()) { - CALLBACK_POST(Decimal); - } else { - CALLBACK_POST(Real); - } - } else { - frames.top().get().assign( - key, internal::parse_number(line, column, stream, character)); + { + const auto value_line{line}; + const auto value_column{column}; + switch (character) { + case internal::token_array_begin: + goto do_scan_array; + case internal::token_object_begin: + goto do_scan_object; + case internal::token_true: + internal::scan_true(line, column, cursor, end); + tape.push_back({TapeType::True, 0, 0, 0, value_line, value_column}); + goto do_scan_object_property_end; + case internal::token_false: + internal::scan_false(line, column, cursor, end); + tape.push_back({TapeType::False, 0, 0, 0, value_line, value_column}); + goto do_scan_object_property_end; + case internal::token_null: + internal::scan_null(line, column, cursor, end); + tape.push_back({TapeType::Null, 0, 0, 0, value_line, value_column}); + goto do_scan_object_property_end; + case internal::token_string_quote: { + const auto string_start{ + static_cast(cursor - buffer_start)}; + internal::scan_string(line, column, cursor, end); + const auto string_length{static_cast( + cursor - buffer_start - string_start - 1)}; + tape.push_back({TapeType::String, string_start, string_length, 0, + value_line, value_column}); + goto do_scan_object_property_end; } - - goto do_parse_object_property_end; - - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_object_property_value; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_object_property_value; - default: - goto error; + case internal::token_number_minus: + case internal::token_number_zero: + case internal::token_number_one: + case internal::token_number_two: + case internal::token_number_three: + case internal::token_number_four: + case internal::token_number_five: + case internal::token_number_six: + case internal::token_number_seven: + case internal::token_number_eight: + case internal::token_number_nine: { + const auto number_start{ + static_cast(cursor - buffer_start - 1)}; + internal::scan_number(line, column, cursor, end, + character); + const auto number_length{ + static_cast(cursor - buffer_start - number_start)}; + tape.push_back({TapeType::Number, number_start, number_length, 0, + value_line, value_column}); + goto do_scan_object_property_end; + } + default: + throw JSONParseError(line, column); + } } -do_parse_object_property_end: - assert(levels.top() == Container::Object); - column += 1; - character = static_cast(stream.get()); +do_scan_object_property_end: + internal::skip_whitespace(cursor, end, line, column); + if (cursor >= end) { + if constexpr (TrackPositions) { + column += 1; + } + throw JSONParseError(line, column); + } + if constexpr (TrackPositions) { + column += 1; + } + character = *cursor++; switch (character) { - case internal::token_object_delimiter: - goto do_parse_object_property_key; - case internal::token_object_end: - CALLBACK_POST(Object); - goto do_parse_container_end; - - // Insignificant whitespace is allowed before or after any token. - // See - // https://www.ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf - case internal::token_whitespace_line_feed: - column = 0; - line += 1; - goto do_parse_object_property_end; - case internal::token_whitespace_tabulation: - case internal::token_whitespace_carriage_return: - case internal::token_whitespace_space: - goto do_parse_object_property_end; + case internal::token_object_delimiter: + goto do_scan_object_key; + case internal::token_object_end: { + assert(!container_stack.empty()); + auto &frame{container_stack.back()}; + tape[frame.tape_index].count = frame.child_count; + tape.push_back({TapeType::ObjectEnd, 0, 0, 0, line, column}); + container_stack.pop_back(); + goto do_scan_container_end; + } default: - goto error; - } - - /* - * Finish parsing a container - */ - -error: - // For some strange reason, with certain AppleClang versions, - // the program crashes when de-allocating huge array/objects - // before throwing an error. The error goes away if we manually - // reset every frame of the resulting object. Compiler error? - // Seen on Apple clang version 14.0.3 (clang-1403.0.22.14.1) - while (!frames.empty()) { - frames.top().get().into(Result{nullptr}); - frames.pop(); + throw JSONParseError(line, column); } - throw JSONParseError(line, column); - -do_parse_container_end: - assert(!levels.empty()); - if (levels.size() == 1) { - return result.value(); +do_scan_container_end: + if (container_stack.empty()) { + return; } - frames.pop(); - levels.pop(); - if (levels.top() == Container::Array) { - goto do_parse_array_item_separator; + if (tape[container_stack.back().tape_index].type == TapeType::ArrayStart) { + goto do_scan_array_item_separator; } else { - goto do_parse_object_property_end; + goto do_scan_object_property_end; } } // NOLINTEND(cppcoreguidelines-avoid-goto) -auto internal_parse_json( - const std::basic_string - &input, - std::uint64_t &line, std::uint64_t &column, - const JSON::ParseCallback &callback) -> JSON { - std::basic_istringstream> - stream{input}; - return internal_parse_json(stream, line, column, callback); -} - } // namespace sourcemeta::core -#undef CALLBACK_PRE -#undef CALLBACK_PRE_WITH_POSITION -#undef CALLBACK_POST - #endif diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h index 7682ae53..56885d9f 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer.h @@ -415,7 +415,7 @@ auto to_pointer(const std::basic_string Pointer; +/// @ingroup jsonpointer +/// Check if the given string is a valid JSON Pointer per RFC 6901 without +/// constructing a JSON Pointer object. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// assert(sourcemeta::core::is_pointer("/foo/bar/0")); +/// assert(sourcemeta::core::is_pointer("")); +/// assert(!sourcemeta::core::is_pointer("foo")); +/// ``` +SOURCEMETA_CORE_JSONPOINTER_EXPORT +auto is_pointer(std::string_view input) noexcept -> bool; + /// @ingroup jsonpointer /// Convert a JSON Pointer into a JSON WeakPointer. For example: /// diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h index e2da4426..3c10e703 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_pointer.h @@ -13,6 +13,8 @@ #include // std::move #include // std::vector +#include + namespace sourcemeta::core { /// @ingroup jsonpointer @@ -131,7 +133,7 @@ template class GenericPointer { /// assert(pointer.back().is_property()); /// assert(pointer.back().to_property() == "bar"); /// ``` - [[nodiscard]] auto back() const -> const_reference { + [[nodiscard]] SOURCEMETA_FORCEINLINE auto back() const -> const_reference { assert(!this->empty()); return this->data.back(); } @@ -146,7 +148,7 @@ template class GenericPointer { /// const sourcemeta::core::Pointer pointer{"foo", "bar"}; /// assert(pointer.size() == 2); /// ``` - [[nodiscard]] auto size() const noexcept -> size_type { + [[nodiscard]] SOURCEMETA_FORCEINLINE auto size() const noexcept -> size_type { return this->data.size(); } @@ -162,7 +164,7 @@ template class GenericPointer { /// assert(empty_pointer.empty()); /// assert(!non_empty_pointer.empty()); /// ``` - [[nodiscard]] auto empty() const noexcept -> bool { + [[nodiscard]] SOURCEMETA_FORCEINLINE auto empty() const noexcept -> bool { return this->data.empty(); } @@ -179,7 +181,8 @@ template class GenericPointer { /// assert(!pointer.empty()); /// assert(token.is_property()); /// ``` - template auto emplace_back(Args &&...args) -> reference { + template + SOURCEMETA_FORCEINLINE auto emplace_back(Args &&...args) -> reference { return this->data.emplace_back(std::forward(args)...); } @@ -215,7 +218,8 @@ template class GenericPointer { /// assert(pointer.at(1).to_property() == "bar"); /// assert(pointer.at(2).to_property() == "baz"); /// ``` - auto push_back(const GenericPointer &other) -> void { + SOURCEMETA_FORCEINLINE auto + push_back(const GenericPointer &other) -> void { if (other.empty()) { return; } else if (other.size() == 1) { @@ -248,7 +252,8 @@ template class GenericPointer { /// assert(pointer.at(1).to_property() == "bar"); /// assert(pointer.at(2).to_property() == "baz"); /// ``` - auto push_back(GenericPointer &&other) -> void { + SOURCEMETA_FORCEINLINE auto push_back(GenericPointer &&other) + -> void { if (other.empty()) { return; } else if (other.size() == 1) { @@ -284,7 +289,8 @@ template class GenericPointer { /// assert(pointer.at(2).to_property() == "baz"); /// ``` template - auto push_back(const GenericPointer &other) -> void + SOURCEMETA_FORCEINLINE auto + push_back(const GenericPointer &other) -> void requires std::is_same_v> { if (other.empty()) { @@ -328,7 +334,8 @@ template class GenericPointer { /// assert(pointer.at(0).to_property() == "foo"); /// assert(pointer.at(1).to_property() == "bar"); /// ``` - auto push_back(const typename Token::Property &property) -> void { + SOURCEMETA_FORCEINLINE auto + push_back(const typename Token::Property &property) -> void { this->data.emplace_back(property); } @@ -349,7 +356,8 @@ template class GenericPointer { /// assert(pointer.at(0).to_property() == "foo"); /// assert(pointer.at(1).to_property() == "bar"); /// ``` - auto push_back(typename Token::Property &&property) -> void { + SOURCEMETA_FORCEINLINE auto push_back(typename Token::Property &&property) + -> void { this->data.emplace_back(std::move(property)); } @@ -371,7 +379,8 @@ template class GenericPointer { /// assert(pointer.at(0).to_property() == "foo"); /// assert(pointer.at(1).to_index() == 0); /// ``` - auto push_back(const typename Token::Index &index) -> void { + SOURCEMETA_FORCEINLINE auto push_back(const typename Token::Index &index) + -> void { this->data.emplace_back(index); } @@ -681,6 +690,10 @@ template class GenericPointer { [[nodiscard]] auto resolve_from(const GenericPointer &base) const -> GenericPointer { + if (base.empty()) { + return *this; + } + typename Container::size_type index{0}; while (index < base.size()) { if (index >= this->size() || base.data[index] != this->data[index]) { @@ -694,6 +707,9 @@ template class GenericPointer { auto new_begin{this->data.cbegin()}; std::advance(new_begin, index); GenericPointer result; + const auto remaining{static_cast( + this->data.cend() - new_begin)}; + result.data.reserve(remaining); std::copy(new_begin, this->data.cend(), std::back_inserter(result.data)); return result; } @@ -759,15 +775,12 @@ template class GenericPointer { } private: + // Intentionally only fold hash.a for performance, as the first + // 16 bytes already provide sufficient entropy for bucketing static auto property_hash(const typename Hash::hash_type &hash) noexcept -> std::size_t { -#if defined(__SIZEOF_INT128__) - const auto *parts = - reinterpret_cast(&hash.a); // NOLINT - return parts[0] ^ parts[1]; -#else - return hash.a ^ hash.b; -#endif + return static_cast(hash.a) ^ + static_cast(hash.a >> 64); } }; diff --git a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_position.h b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_position.h index b19f8f3a..cd14395d 100644 --- a/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_position.h +++ b/vendor/core/src/core/jsonpointer/include/sourcemeta/core/jsonpointer_position.h @@ -9,13 +9,12 @@ #include -#include // std::size_t -#include // std::uint64_t -#include // std::map -#include // std::optional -#include // std::stack -#include // std::tuple -#include // std::pair +#include // std::size_t +#include // std::uint64_t +#include // std::optional +#include // std::tuple +#include // std::unordered_map +#include // std::vector namespace sourcemeta::core { @@ -30,7 +29,8 @@ namespace sourcemeta::core { /// /// const auto input{"{\n \"foo\": \"bar\"\n}"};; /// sourcemeta::core::PointerPositionTracker tracker; -/// sourcemeta::core::parse_json(stream, std::ref(tracker)); +/// sourcemeta::core::JSON document{nullptr}; +/// sourcemeta::core::parse_json(input, document, std::ref(tracker)); /// assert(tracker.size() == 2); /// const auto foo{tracker.get(sourcemeta::core::Pointer{"foo"})}; /// assert(foo.has_value()); @@ -51,22 +51,39 @@ class SOURCEMETA_CORE_JSONPOINTER_EXPORT PointerPositionTracker { auto operator()(const JSON::ParsePhase phase, const JSON::Type, const std::uint64_t line, const std::uint64_t column, const JSON::ParseContext context, const std::size_t index, - const JSON::StringView property) -> void; + const JSON::String &property) -> void; [[nodiscard]] auto get(const Pointer &pointer) const -> std::optional; [[nodiscard]] auto size() const -> std::size_t; [[nodiscard]] auto to_json() const -> JSON; private: + struct Event { + JSON::ParsePhase phase; + JSON::ParseContext context; + std::size_t index; + const JSON::String *property; + std::uint64_t line; + std::uint64_t column; + }; + + struct TrieNode { + std::optional position; + std::unordered_map index_children; + std::unordered_map property_children; + }; + + auto ensure_index() const -> void; + // Exporting symbols that depends on the standard C++ library is considered // safe. // https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN #if defined(_MSC_VER) #pragma warning(disable : 4251) #endif - Pointer current; - std::stack> stack; - std::map data; + std::vector events; + mutable bool indexed{false}; + mutable std::vector trie; #if defined(_MSC_VER) #pragma warning(default : 4251) #endif diff --git a/vendor/core/src/core/jsonpointer/jsonpointer.cc b/vendor/core/src/core/jsonpointer/jsonpointer.cc index 794e8b46..0b9442c6 100644 --- a/vendor/core/src/core/jsonpointer/jsonpointer.cc +++ b/vendor/core/src/core/jsonpointer/jsonpointer.cc @@ -5,7 +5,6 @@ #include #include -#include "grammar.h" #include "parser.h" #include "stringify.h" @@ -15,6 +14,7 @@ #include // std::basic_ostream #include // std::basic_ostringstream, std::basic_stringstream #include // std::basic_string +#include // std::string_view #include // std::is_same_v #include // std::move @@ -302,19 +302,16 @@ auto remove(JSON &document, const WeakPointer &pointer) -> bool { auto to_pointer(const JSON &document) -> Pointer { assert(document.is_string()); auto stream{document.to_stringstream()}; - return parse_pointer(stream); + return parse_pointer(stream); } auto to_pointer(const std::basic_string> &input) -> Pointer { - std::basic_stringstream> - stream; - stream << internal::token_pointer_quote; - stream << input; - stream << internal::token_pointer_quote; - return to_pointer(parse_json(stream)); + std::basic_istringstream> + stream{input}; + return parse_pointer(stream); } auto to_pointer(const WeakPointer &pointer) -> Pointer { @@ -407,4 +404,14 @@ auto to_uri(const WeakPointer &pointer, const std::string_view base) -> URI { return to_uri(pointer).resolve_from(URI{base}).canonicalize(); } +auto is_pointer(const std::string_view input) noexcept -> bool { + try { + std::basic_istringstream stream{std::string{input}}; + parse_pointer(stream); + return true; + } catch (...) { + return false; + } +} + } // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonpointer/parser.h b/vendor/core/src/core/jsonpointer/parser.h index 35a5615d..d0bc7f65 100644 --- a/vendor/core/src/core/jsonpointer/parser.h +++ b/vendor/core/src/core/jsonpointer/parser.h @@ -7,11 +7,12 @@ #include #include -#include // std::uint64_t -#include // std::basic_istream -#include // std::basic_stringstream -#include // std::out_of_range -#include // std::stoi +#include // std::uint64_t +#include // std::basic_istream +#include // std::basic_stringstream +#include // std::out_of_range +#include // std::stoi +#include // std::conditional_t namespace sourcemeta::core::internal { template > &stream, // NOLINTBEGIN(cppcoreguidelines-avoid-goto) namespace sourcemeta::core { +template auto parse_pointer(std::basic_istream &stream) - -> Pointer { - Pointer result; + -> std::conditional_t { + [[maybe_unused]] Pointer result; JSON::Char character = 0; - std::basic_stringstream string; + [[maybe_unused]] std::basic_stringstream string; std::uint64_t column{0}; parse_token_begin: @@ -84,15 +86,21 @@ auto parse_pointer(std::basic_istream &stream) case internal::token_pointer_number_nine: column += 1; stream.ignore(); - string.put(character); + if constexpr (!CheckOnly) { + string.put(character); + } goto parse_token_index_rest_any; case static_cast(JSON::CharTraits::eof()): column += 1; stream.ignore(); - result.emplace_back(""); + if constexpr (!CheckOnly) { + result.emplace_back(""); + } goto done; case internal::token_pointer_slash: - result.emplace_back(""); + if constexpr (!CheckOnly) { + result.emplace_back(""); + } goto parse_token_begin; case internal::token_pointer_tilde: column += 1; @@ -101,7 +109,9 @@ auto parse_pointer(std::basic_istream &stream) default: column += 1; stream.ignore(); - string.put(character); + if constexpr (!CheckOnly) { + string.put(character); + } goto parse_token_property_rest_any; } @@ -110,20 +120,26 @@ auto parse_pointer(std::basic_istream &stream) */ parse_token_index_end: - string.put(character); + if constexpr (!CheckOnly) { + string.put(character); + } character = static_cast(stream.peek()); switch (character) { case internal::token_pointer_slash: column += 1; stream.ignore(); - result.emplace_back(internal::parse_index(string, column)); - internal::reset(string); + if constexpr (!CheckOnly) { + result.emplace_back(internal::parse_index(string, column)); + internal::reset(string); + } goto parse_token_content; case static_cast(JSON::CharTraits::eof()): column += 1; stream.ignore(); - result.emplace_back(internal::parse_index(string, column)); - internal::reset(string); + if constexpr (!CheckOnly) { + result.emplace_back(internal::parse_index(string, column)); + internal::reset(string); + } goto done; default: goto parse_token_property_rest_any; @@ -135,14 +151,18 @@ auto parse_pointer(std::basic_istream &stream) case internal::token_pointer_slash: column += 1; stream.ignore(); - result.emplace_back(internal::parse_index(string, column)); - internal::reset(string); + if constexpr (!CheckOnly) { + result.emplace_back(internal::parse_index(string, column)); + internal::reset(string); + } goto parse_token_content; case static_cast(JSON::CharTraits::eof()): column += 1; stream.ignore(); - result.emplace_back(internal::parse_index(string, column)); - internal::reset(string); + if constexpr (!CheckOnly) { + result.emplace_back(internal::parse_index(string, column)); + internal::reset(string); + } goto done; case internal::token_pointer_number_zero: case internal::token_pointer_number_one: @@ -156,7 +176,9 @@ auto parse_pointer(std::basic_istream &stream) case internal::token_pointer_number_nine: column += 1; stream.ignore(); - string.put(character); + if constexpr (!CheckOnly) { + string.put(character); + } goto parse_token_index_rest_any; default: @@ -172,17 +194,23 @@ auto parse_pointer(std::basic_istream &stream) column += 1; switch (character) { case internal::token_pointer_slash: - result.emplace_back(string.str()); - internal::reset(string); + if constexpr (!CheckOnly) { + result.emplace_back(string.str()); + internal::reset(string); + } goto parse_token_content; case internal::token_pointer_tilde: goto parse_token_escape_tilde; case static_cast(JSON::CharTraits::eof()): - result.emplace_back(string.str()); - internal::reset(string); + if constexpr (!CheckOnly) { + result.emplace_back(string.str()); + internal::reset(string); + } goto done; default: - string.put(character); + if constexpr (!CheckOnly) { + string.put(character); + } goto parse_token_property_rest_any; } @@ -196,17 +224,23 @@ auto parse_pointer(std::basic_istream &stream) // See https://www.rfc-editor.org/rfc/rfc6901#section-3 switch (character) { case internal::token_pointer_number_zero: - string.put(internal::token_pointer_tilde); + if constexpr (!CheckOnly) { + string.put(internal::token_pointer_tilde); + } goto parse_token_property_rest_any; case internal::token_pointer_number_one: - string.put(internal::token_pointer_slash); + if constexpr (!CheckOnly) { + string.put(internal::token_pointer_slash); + } goto parse_token_property_rest_any; default: throw PointerParseError(column); } done: - return result; + if constexpr (!CheckOnly) { + return result; + } } // NOLINTEND(cppcoreguidelines-avoid-goto) diff --git a/vendor/core/src/core/jsonpointer/position.cc b/vendor/core/src/core/jsonpointer/position.cc index d43dfae7..7b51bf89 100644 --- a/vendor/core/src/core/jsonpointer/position.cc +++ b/vendor/core/src/core/jsonpointer/position.cc @@ -1,65 +1,178 @@ #include #include -#include // assert -#include // std::size_t -#include // std::uint64_t -#include // std::optional +#include // std::count_if +#include // assert +#include // std::size_t +#include // std::uint64_t +#include // std::optional +#include // std::string_view namespace sourcemeta::core { -auto PointerPositionTracker::operator()( - const JSON::ParsePhase phase, const JSON::Type, const std::uint64_t line, - const std::uint64_t column, const JSON::ParseContext context, - const std::size_t index, const JSON::StringView property) -> void { - if (phase == JSON::ParsePhase::Pre) { - this->stack.emplace(line, column); - switch (context) { - case JSON::ParseContext::Property: - this->current.push_back(JSON::String{property}); +auto PointerPositionTracker::ensure_index() const -> void { + if (this->indexed) { + return; + } + + this->indexed = true; + this->trie.push_back({.position = std::nullopt, + .index_children = {}, + .property_children = {}}); + + std::size_t current_node{0}; + std::vector>> + node_stack; + + for (const auto &event : this->events) { + switch (event.phase) { + case JSON::ParsePhase::Pre: + node_stack.emplace_back(current_node, + std::make_pair(event.line, event.column)); + + switch (event.context) { + case JSON::ParseContext::Property: { + assert(event.property != nullptr); + const std::string_view key{*event.property}; + auto iterator{this->trie[current_node].property_children.find(key)}; + if (iterator == this->trie[current_node].property_children.end()) { + const auto node_index{this->trie.size()}; + this->trie.push_back({.position = std::nullopt, + .index_children = {}, + .property_children = {}}); + this->trie[current_node].property_children.emplace(key, + node_index); + current_node = node_index; + } else { + current_node = iterator->second; + } + break; + } + case JSON::ParseContext::Index: { + auto iterator{ + this->trie[current_node].index_children.find(event.index)}; + if (iterator == this->trie[current_node].index_children.end()) { + const auto node_index{this->trie.size()}; + this->trie.push_back({.position = std::nullopt, + .index_children = {}, + .property_children = {}}); + this->trie[current_node].index_children.emplace(event.index, + node_index); + current_node = node_index; + } else { + current_node = iterator->second; + } + break; + } + case JSON::ParseContext::Root: + break; + } + break; - case JSON::ParseContext::Index: - this->current.push_back(index); + case JSON::ParsePhase::Post: + assert(!node_stack.empty()); + this->trie[current_node].position = + Position{node_stack.back().second.first, + node_stack.back().second.second, event.line, event.column}; + current_node = node_stack.back().first; + node_stack.pop_back(); break; - case JSON::ParseContext::Root: + default: + assert(false); break; } - } else if (phase == JSON::ParsePhase::Post) { - assert(!this->stack.empty()); - this->data.emplace(this->current, - Position{this->stack.top().first, - this->stack.top().second, line, column}); - this->stack.pop(); - if (!this->current.empty()) { - this->current.pop_back(); - } } } +auto PointerPositionTracker::operator()( + const JSON::ParsePhase phase, const JSON::Type, const std::uint64_t line, + const std::uint64_t column, const JSON::ParseContext context, + const std::size_t index, const JSON::String &property) -> void { + this->events.push_back({.phase = phase, + .context = context, + .index = index, + .property = context == JSON::ParseContext::Property + ? &property + : nullptr, + .line = line, + .column = column}); +} + auto PointerPositionTracker::get(const Pointer &pointer) const -> std::optional { - assert(this->stack.empty()); - assert(this->current.empty()); - const auto result{this->data.find(pointer)}; - return result == this->data.cend() ? std::nullopt - : std::optional{result->second}; + this->ensure_index(); + std::size_t node{0}; + for (const auto &token : pointer) { + if (token.is_property()) { + const auto &children{this->trie[node].property_children}; + const auto iterator{children.find(std::string_view{token.to_property()})}; + if (iterator == children.end()) { + return std::nullopt; + } + node = iterator->second; + } else { + const auto &children{this->trie[node].index_children}; + const auto iterator{children.find(token.to_index())}; + if (iterator == children.end()) { + return std::nullopt; + } + node = iterator->second; + } + } + + return this->trie[node].position; } auto PointerPositionTracker::size() const -> std::size_t { - assert(this->stack.empty()); - assert(this->current.empty()); - return this->data.size(); + return static_cast(std::count_if( + this->events.cbegin(), this->events.cend(), [](const Event &event) { + return event.phase == JSON::ParsePhase::Post; + })); } auto PointerPositionTracker::to_json() const -> JSON { - assert(this->stack.empty()); - assert(this->current.empty()); auto result{JSON::make_object()}; - for (const auto &entry : this->data) { - result.assign_assume_new(to_string(entry.first), - sourcemeta::core::to_json(entry.second)); + Pointer current; + std::vector> start_stack; + + for (const auto &event : this->events) { + switch (event.phase) { + case JSON::ParsePhase::Pre: + start_stack.emplace_back(event.line, event.column); + switch (event.context) { + case JSON::ParseContext::Property: + assert(event.property != nullptr); + current.push_back(*event.property); + break; + case JSON::ParseContext::Index: + current.push_back(event.index); + break; + default: + break; + } + + break; + case JSON::ParsePhase::Post: + assert(!start_stack.empty()); + result.assign_assume_new( + to_string(current), + sourcemeta::core::to_json(Position{start_stack.back().first, + start_stack.back().second, + event.line, event.column})); + start_stack.pop_back(); + if (!current.empty()) { + current.pop_back(); + } + + break; + default: + assert(false); + break; + } } + assert(current.empty()); + assert(start_stack.empty()); return result; } diff --git a/vendor/core/src/core/jsonschema/bundle.cc b/vendor/core/src/core/jsonschema/bundle.cc index ac9784aa..b0ed8dfc 100644 --- a/vendor/core/src/core/jsonschema/bundle.cc +++ b/vendor/core/src/core/jsonschema/bundle.cc @@ -1,9 +1,12 @@ #include +#include "helpers.h" + #include // assert #include // std::reference_wrapper #include // std::ostringstream #include // std::tuple +#include // std::unordered_map #include // std::unordered_set #include // std::move #include // std::vector @@ -16,7 +19,7 @@ auto is_official_metaschema_reference( assert(!pointer.empty()); assert(pointer.back().is_property()); return pointer.back().to_property() == "$schema" && - sourcemeta::core::schema_resolver(destination).has_value(); + sourcemeta::core::is_known_schema(destination); } auto dependencies_internal(const sourcemeta::core::JSON &schema, @@ -130,15 +133,113 @@ auto embed_schema(sourcemeta::core::JSON &root, current->assign(key.str(), std::move(target)); } +auto elevate_embedded_resources( + sourcemeta::core::JSON &remote, sourcemeta::core::JSON &root, + const sourcemeta::core::Pointer &container, + const sourcemeta::core::SchemaBaseDialect remote_dialect, + const sourcemeta::core::SchemaResolver &resolver, + std::string_view default_dialect, + std::unordered_map &bundled) -> void { + const auto keyword{sourcemeta::core::definitions_keyword(remote_dialect)}; + const sourcemeta::core::JSON::String keyword_string{keyword}; + if (keyword.empty() || !remote.is_object() || + !remote.defines(keyword_string) || + !remote.at(keyword_string).is_object()) { + return; + } + + auto &defs{remote.at(keyword_string)}; + + // Navigate to the root container once, as it doesn't change per entry + const sourcemeta::core::JSON *root_container{&root}; + bool container_exists{true}; + for (const auto &token : container) { + if (!token.is_property() || !root_container->is_object() || + !root_container->defines(token.to_property())) { + container_exists = false; + break; + } + + root_container = &root_container->at(token.to_property()); + } + + std::vector to_extract; + std::vector to_remove; + for (const auto &entry : defs.as_object()) { + const auto &key{entry.first}; + const auto &value{entry.second}; + const auto entry_dialect{ + sourcemeta::core::base_dialect(value, resolver, default_dialect)}; + const auto effective_entry_dialect{ + entry_dialect.has_value() ? entry_dialect.value() : remote_dialect}; + const auto identifier{ + sourcemeta::core::identify(value, effective_entry_dialect)}; + if (identifier.empty() || identifier != key || + !sourcemeta::core::URI{identifier}.is_absolute()) { + continue; + } + + const sourcemeta::core::JSON::String identifier_string{identifier}; + if (bundled.contains(identifier_string)) { + if (container_exists && root_container->is_object()) { + for (const auto &root_entry : root_container->as_object()) { + if (!root_entry.first.starts_with(identifier_string)) { + continue; + } + + const auto stored_dialect{sourcemeta::core::base_dialect( + root_entry.second, resolver, default_dialect)}; + const auto effective_stored_dialect{stored_dialect.has_value() + ? stored_dialect.value() + : remote_dialect}; + const auto stored_id{sourcemeta::core::identify( + root_entry.second, effective_stored_dialect)}; + if (stored_id != identifier_string) { + continue; + } + + if (root_entry.second != value) { + throw sourcemeta::core::SchemaError( + "Conflicting embedded resources with the same identifier"); + } + + break; + } + } + + to_remove.emplace_back(key); + } else { + to_extract.emplace_back(key); + bundled.emplace(identifier_string, identifier_string); + } + } + + for (const auto &key : to_extract) { + auto value{std::move(defs.at(key))}; + defs.erase(key); + embed_schema(root, container, key, std::move(value)); + } + + for (const auto &key : to_remove) { + defs.erase(key); + } + + if (defs.empty()) { + remote.erase(sourcemeta::core::JSON::String{keyword}); + } +} + auto bundle_schema(sourcemeta::core::JSON &root, const sourcemeta::core::Pointer &container, - const sourcemeta::core::JSON &subschema, + sourcemeta::core::JSON &subschema, const sourcemeta::core::SchemaWalker &walker, const sourcemeta::core::SchemaResolver &resolver, std::string_view default_dialect, std::string_view default_id, const sourcemeta::core::SchemaFrame::Paths &paths, - std::unordered_set &bundled, + std::unordered_map &bundled, const std::size_t depth = 0) -> void { // Create a fresh frame for each schema we analyze to avoid key collisions // between different schemas that have references at the same pointer paths @@ -153,6 +254,13 @@ auto bundle_schema(sourcemeta::core::JSON &root, frame.analyse(subschema, walker, resolver, default_dialect, default_id); } + std::vector> + deferred; + std::vector< + std::pair> + ref_rewrites; + frame.for_each_unresolved_reference([&](const auto &pointer, const auto &reference) { // We don't want to bundle official schemas, as we can expect @@ -178,9 +286,18 @@ auto bundle_schema(sourcemeta::core::JSON &root, assert(!reference.base.empty()); const sourcemeta::core::JSON::String identifier{reference.base}; - // Skip if already bundled to avoid infinite loops on circular - // references if (bundled.contains(identifier)) { + const auto &mapped_id{bundled.at(identifier)}; + if (mapped_id != identifier) { + sourcemeta::core::URI rewrite_uri{mapped_id}; + if (reference.fragment.has_value()) { + rewrite_uri.fragment(reference.fragment.value()); + } + + ref_rewrites.emplace_back(sourcemeta::core::to_pointer(pointer), + rewrite_uri.recompose()); + } + return; } @@ -210,6 +327,9 @@ auto bundle_schema(sourcemeta::core::JSON &root, "The JSON document is not a valid JSON Schema"); } + auto remote_id = + sourcemeta::core::identify(remote.value(), resolver, default_dialect); + // If the reference has a fragment, verify it exists in the remote // schema if (reference.fragment.has_value()) { @@ -226,19 +346,43 @@ auto bundle_schema(sourcemeta::core::JSON &root, } } + sourcemeta::core::JSON::String effective_id{ + remote_id.empty() ? sourcemeta::core::JSON::String{identifier} + : sourcemeta::core::JSON::String{remote_id}}; + if (remote.value().is_object()) { - // Always insert an identifier, as a schema might refer to another - // schema using another URI (i.e. due to relying on HTTP - // re-directions, etc) - sourcemeta::core::reidentify(remote.value(), identifier, + sourcemeta::core::reidentify(remote.value(), effective_id, remote_base_dialect.value()); } - bundled.emplace(identifier); - bundle_schema(root, container, remote.value(), walker, resolver, - default_dialect, identifier, paths, bundled, depth + 1); - embed_schema(root, container, identifier, std::move(remote).value()); + if (effective_id != identifier) { + sourcemeta::core::URI rewrite_uri{effective_id}; + if (reference.fragment.has_value()) { + rewrite_uri.fragment(reference.fragment.value()); + } + + ref_rewrites.emplace_back(sourcemeta::core::to_pointer(pointer), + rewrite_uri.recompose()); + } + + bundled.emplace(identifier, effective_id); + bundled.emplace(effective_id, effective_id); + deferred.emplace_back(std::move(remote).value(), std::move(effective_id), + remote_base_dialect.value()); }); + + for (auto &[rewrite_pointer, rewrite_value] : ref_rewrites) { + sourcemeta::core::set(subschema, rewrite_pointer, + sourcemeta::core::JSON{rewrite_value}); + } + + for (auto &[remote, effective_id, remote_dialect] : deferred) { + bundle_schema(root, container, remote, walker, resolver, default_dialect, + effective_id, paths, bundled, depth + 1); + elevate_embedded_resources(remote, root, container, remote_dialect, + resolver, default_dialect, bundled); + embed_schema(root, container, effective_id, std::move(remote)); + } } } // namespace @@ -265,12 +409,13 @@ auto bundle(JSON &schema, const SchemaWalker &walker, // Pre-scan the schema to find any already-embedded schemas and mark them // as bundled to avoid re-embedding them. This includes the root schema itself // and any schemas already embedded within it - std::unordered_set bundled; + std::unordered_map bundled; SchemaFrame initial_frame{SchemaFrame::Mode::Locations}; initial_frame.analyse(schema, walker, resolver, default_dialect, default_id, paths); - initial_frame.for_each_resource_uri( - [&bundled](const auto uri) { bundled.emplace(uri); }); + initial_frame.for_each_resource_uri([&bundled](const auto uri) { + bundled.emplace(JSON::String{uri}, JSON::String{uri}); + }); if (default_container.has_value()) { // This is undefined behavior assert(!default_container.value().empty()); @@ -288,75 +433,43 @@ auto bundle(JSON &schema, const SchemaWalker &walker, reidentify(schema, default_id, resolver, default_dialect); } - const auto vocabularies{ - sourcemeta::core::vocabularies(schema, resolver, default_dialect)}; - if (vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_2020_12_Core) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_2019_09_Core)) { - bundle_schema(schema, {"$defs"}, schema, walker, resolver, default_dialect, - default_id, paths, bundled); - return; - } else if ( - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_7) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_7_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_6) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_6_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_4) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_4_Hyper)) { - if (schema.is_object() && schema.defines("$ref")) { - // This is a very specific case in which we can "fix" this - if (schema.size() == 1) { - auto branches{JSON::make_array()}; - branches.push_back(schema); - schema.at("$ref").into(std::move(branches)); - // Note that `allOf` was introduced in Draft 4 - schema.rename("$ref", "allOf"); - } else { - throw sourcemeta::core::SchemaError( - "Cannot bundle a JSON Schema Draft 7 or older with a top-level " - "`$ref` (which overrides sibling keywords) without introducing " - "undefined behavior"); - } - } + const auto schema_base_dialect{ + base_dialect(schema, resolver, default_dialect)}; + if (!schema_base_dialect.has_value()) { + throw SchemaError( + "Could not determine how to perform bundling in this dialect"); + } - bundle_schema(schema, {"definitions"}, schema, walker, resolver, - default_dialect, default_id, paths, bundled); - return; - } else if ( - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_3_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_3) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_2_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_2) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_1_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_1) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_0_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_0)) { + const auto container_keyword{ + definitions_keyword(schema_base_dialect.value())}; + if (container_keyword.empty()) { SchemaFrame frame{SchemaFrame::Mode::References}; frame.analyse(schema, walker, resolver, default_dialect, default_id); if (frame.standalone()) { return; } + + throw SchemaError( + "Could not determine how to perform bundling in this dialect"); + } + + if (ref_overrides_adjacent_keywords(schema_base_dialect.value()) && + schema.is_object() && schema.defines("$ref")) { + if (schema.size() == 1) { + auto branches{JSON::make_array()}; + branches.push_back(schema); + schema.at("$ref").into(std::move(branches)); + schema.rename("$ref", "allOf"); + } else { + throw SchemaError( + "Cannot bundle a JSON Schema Draft 7 or older with a top-level " + "`$ref` (which overrides sibling keywords) without introducing " + "undefined behavior"); + } } - // We don't attempt to bundle on dialects where we - // don't know where to put the embedded schemas - throw SchemaError( - "Could not determine how to perform bundling in this dialect"); + bundle_schema(schema, {JSON::String{container_keyword}}, schema, walker, + resolver, default_dialect, default_id, paths, bundled); } auto bundle(const JSON &schema, const SchemaWalker &walker, diff --git a/vendor/core/src/core/jsonschema/frame.cc b/vendor/core/src/core/jsonschema/frame.cc index 40b351c6..0770b251 100644 --- a/vendor/core/src/core/jsonschema/frame.cc +++ b/vendor/core/src/core/jsonschema/frame.cc @@ -1394,9 +1394,16 @@ auto SchemaFrame::empty() const noexcept -> bool { } auto SchemaFrame::reset() -> void { - // Note that order of removal is important to avoid undefined behaviour + this->pointers_with_non_orphan_.clear(); this->pointer_to_location_.clear(); this->reachability_.clear(); + this->references_by_destination_.clear(); + this->location_members_children_.clear(); + this->descendants_by_pointer_.clear(); + this->potential_sources_by_location_.clear(); + this->reachability_graph_.clear(); + this->canonical_pointer_.clear(); + this->location_to_canonical_.clear(); this->root_.clear(); this->locations_.clear(); this->references_.clear(); @@ -1415,73 +1422,135 @@ auto SchemaFrame::populate_pointer_to_location() const -> void { } } -// TODO: Find a way to split or simplify this monster while preserving -// its performance? -auto SchemaFrame::populate_reachability(const SchemaWalker &walker, - const SchemaResolver &resolver) const - -> void { - if (!this->reachability_.empty()) { +auto SchemaFrame::populate_location_members( + const SchemaWalker &walker, const SchemaResolver &resolver) const -> void { + if (!this->location_members_children_.empty()) { return; } - // --------------------------------------------------------------------------- - // (1) Find all unreachable pointers - // --------------------------------------------------------------------------- + this->populate_pointer_to_location(); - std::vector> unreachable_pointers; + for (const auto &entry : this->locations_) { + if (entry.second.type != LocationType::Subschema) { + continue; + } + if (!entry.second.parent.has_value()) { + continue; + } + const auto &parent_pointer{entry.second.parent.value()}; + const auto relative{entry.second.pointer.slice(parent_pointer.size())}; + if (relative.empty() || !relative.at(0).is_property()) { + continue; + } + const auto parent_location{this->traverse(parent_pointer)}; + if (!parent_location.has_value()) { + continue; + } + const auto vocabs{this->vocabularies(parent_location->get(), resolver)}; + const auto &keyword_result{walker(relative.at(0).to_property(), vocabs)}; + if (keyword_result.type == SchemaKeywordType::LocationMembers) { + this->location_members_children_.insert(std::cref(entry.second.pointer)); + } + } +} - if (this->pointer_to_location_.empty()) { - std::unordered_set, - WeakPointer::Hasher, WeakPointer::Comparator> - has_non_pointer_location; - std::unordered_set, - WeakPointer::Hasher, WeakPointer::Comparator> - has_non_orphan; +auto SchemaFrame::populate_descendants() const -> void { + if (!this->descendants_by_pointer_.empty()) { + return; + } - for (const auto &entry : this->locations_) { - auto [iterator, inserted] = this->pointer_to_location_.try_emplace( - std::cref(entry.second.pointer), std::vector{}); - iterator->second.push_back(&entry.second); - if (entry.second.type != LocationType::Pointer) { - has_non_pointer_location.insert(iterator->first); - if (!entry.second.orphan) { - has_non_orphan.insert(iterator->first); + this->populate_pointer_to_location(); + + for (const auto &entry : this->locations_) { + if (entry.second.type == LocationType::Pointer) { + continue; + } + + const auto &pointer{entry.second.pointer}; + const auto *location{&entry.second}; + + WeakPointer prefix; + for (std::size_t index = 0; index <= pointer.size(); ++index) { + auto prefix_iter = this->pointer_to_location_.find(std::cref(prefix)); + if (prefix_iter != this->pointer_to_location_.end() && + !prefix_iter->second.empty()) { + const auto &key_pointer{prefix_iter->second.front()->pointer}; + this->descendants_by_pointer_[std::cref(key_pointer)].push_back( + location); + } + if (index < pointer.size()) { + const auto &token{pointer.at(index)}; + if (token.is_property()) { + prefix.emplace_back(token.to_property(), token.property_hash()); + } else { + prefix.push_back(token.to_index()); } } } + } +} - for (const auto &pointer_reference : has_non_pointer_location) { - const bool is_reachable = has_non_orphan.contains(pointer_reference); - this->reachability_.emplace(pointer_reference, is_reachable); - if (!is_reachable) { - unreachable_pointers.push_back(pointer_reference); - } +auto SchemaFrame::populate_potential_sources( + const SchemaWalker &walker, const SchemaResolver &resolver) const -> void { + if (!this->potential_sources_by_location_.empty()) { + return; + } + + this->populate_reference_graph(); + this->populate_location_members(walker, resolver); + + for (const auto &entry : this->locations_) { + if (entry.second.type == LocationType::Pointer) { + continue; } - } else { - for (const auto &[pointer_reference, locations] : - this->pointer_to_location_) { - const auto has_non_pointer{ - std::ranges::any_of(locations, [](const Location *location) { - return location->type != LocationType::Pointer; - })}; - if (!has_non_pointer) { - continue; + + const auto &pointer{entry.second.pointer}; + const auto *location{&entry.second}; + std::vector sources; + + WeakPointer ancestor = pointer; + bool first_iteration{true}; + while (first_iteration || !ancestor.empty()) { + auto destination_iterator = + this->references_by_destination_.find(std::cref(ancestor)); + if (destination_iterator != this->references_by_destination_.end()) { + bool crosses{false}; + if (ancestor != pointer) { + for (const auto &boundary_ref : this->location_members_children_) { + const auto &boundary{boundary_ref.get()}; + if (pointer.starts_with(boundary) && + !ancestor.starts_with(boundary)) { + crosses = true; + break; + } + } + } + + for (const auto *source_pointer : destination_iterator->second) { + sources.push_back( + PotentialSource{.source_pointer = source_pointer, + .source_parent = source_pointer->initial(), + .crosses = crosses}); + } } - const auto any_non_orphan{ - std::ranges::any_of(locations, [](const Location *location) { - return location->type != LocationType::Pointer && !location->orphan; - })}; - this->reachability_.emplace(pointer_reference, any_non_orphan); - if (!any_non_orphan) { - unreachable_pointers.push_back(pointer_reference); + if (ancestor.empty()) { + break; } + ancestor = ancestor.initial(); + first_iteration = false; + } + + if (!sources.empty()) { + this->potential_sources_by_location_[location] = std::move(sources); } } +} - // --------------------------------------------------------------------------- - // (2) Build a reverse mapping from reference destinations to their sources - // --------------------------------------------------------------------------- +auto SchemaFrame::populate_reference_graph() const -> void { + if (!this->references_by_destination_.empty()) { + return; + } std::unordered_map> dynamic_anchors_by_fragment; @@ -1531,158 +1600,175 @@ auto SchemaFrame::populate_reachability(const SchemaWalker &walker, } } - std::unordered_map, - std::vector, WeakPointer::Hasher, - WeakPointer::Comparator> - references_by_destination; for (const auto &[source, destination] : reference_destinations) { - references_by_destination[std::cref(*destination)].push_back(source); + this->references_by_destination_[std::cref(*destination)].push_back(source); } +} - // --------------------------------------------------------------------------- - // (3) Precompute which references could make each orphan reachable - // --------------------------------------------------------------------------- +auto SchemaFrame::populate_reachability_graph( + const SchemaWalker &walker, const SchemaResolver &resolver) const -> void { + if (!this->reachability_graph_.empty()) { + return; + } + + this->populate_pointer_to_location(); + this->populate_location_members(walker, resolver); + this->populate_reference_graph(); - struct PotentialSource { - const WeakPointer *source_pointer; - bool crosses; - }; - struct PotentialReach { - std::reference_wrapper pointer; - std::vector potential_sources; - }; - std::vector unreachable_with_sources; - unreachable_with_sources.reserve(unreachable_pointers.size()); + for (const auto &entry : this->locations_) { + if (entry.second.pointer.empty()) { + continue; + } - std::unordered_map vocabularies_cache; + const auto parent_pointer{entry.second.pointer.initial()}; + auto parent_iterator = + this->pointer_to_location_.find(std::cref(parent_pointer)); + if (parent_iterator == this->pointer_to_location_.end()) { + continue; + } - for (const auto &pointer_reference : unreachable_pointers) { - const auto &pointer{pointer_reference.get()}; - PotentialReach entry{.pointer = pointer_reference, .potential_sources = {}}; + for (const Location *parent_location : parent_iterator->second) { + this->reachability_graph_[parent_location].push_back( + ReachabilityEdge{.target = &entry.second, + .orphan_context_only = entry.second.orphan, + .is_reference = false}); + } + } - WeakPointer ancestor = pointer; - while (!ancestor.empty()) { - auto destination_iterator = - references_by_destination.find(std::cref(ancestor)); - if (destination_iterator != references_by_destination.end()) { - bool crosses{false}; - if (ancestor != pointer) { - auto check_location{this->traverse(pointer)}; - while (check_location.has_value()) { - const auto &location{check_location->get()}; - if (location.pointer == ancestor) { - break; - } + for (const auto &[destination_reference, sources] : + this->references_by_destination_) { + auto destination_locations_iterator = + this->pointer_to_location_.find(destination_reference); + if (destination_locations_iterator == this->pointer_to_location_.end()) { + continue; + } - if (!location.parent.has_value()) { - break; - } + const Location *destination_location{nullptr}; + for (const auto *location : destination_locations_iterator->second) { + if (location->type != LocationType::Pointer) { + destination_location = location; + break; + } + } - const auto parent_location{this->traverse(location.parent.value())}; - if (!parent_location.has_value()) { - break; - } + if (!destination_location && + !destination_locations_iterator->second.empty()) { + destination_location = destination_locations_iterator->second.front(); + } - const auto relative{ - location.pointer.slice(location.parent.value().size())}; - if (!relative.empty() && relative.at(0).is_property()) { - const auto &parent_loc{parent_location->get()}; - auto vocab_iterator = - vocabularies_cache.find(parent_loc.base_dialect); - if (vocab_iterator == vocabularies_cache.end()) { - auto [inserted_iterator, inserted] = vocabularies_cache.emplace( - parent_loc.base_dialect, - this->vocabularies(parent_loc, resolver)); - vocab_iterator = inserted_iterator; - } + if (!destination_location) { + continue; + } - const auto &keyword_result{ - walker(relative.at(0).to_property(), vocab_iterator->second)}; - if (keyword_result.type == SchemaKeywordType::LocationMembers) { - crosses = true; - break; - } - } + for (const auto *source_pointer : sources) { + if (source_pointer->empty()) { + continue; + } - check_location = parent_location; - } - } + const auto source_parent_pointer{source_pointer->initial()}; + auto source_parent_iterator = + this->pointer_to_location_.find(std::cref(source_parent_pointer)); + if (source_parent_iterator == this->pointer_to_location_.end()) { + continue; + } - for (const auto *source_pointer : destination_iterator->second) { - entry.potential_sources.push_back(PotentialSource{ - .source_pointer = source_pointer, .crosses = crosses}); - } + for (const Location *source_parent_location : + source_parent_iterator->second) { + this->reachability_graph_[source_parent_location].push_back( + ReachabilityEdge{.target = destination_location, + .orphan_context_only = false, + .is_reference = true}); } - ancestor = ancestor.initial(); } + } - if (!entry.potential_sources.empty()) { - unreachable_with_sources.push_back(std::move(entry)); - } + for (const auto &entry : this->locations_) { + auto result = this->canonical_pointer_.emplace( + std::cref(entry.second.pointer), &entry.second.pointer); + this->location_to_canonical_[&entry.second] = + result.second ? &entry.second.pointer : result.first->second; } +} - std::ranges::sort(unreachable_with_sources, [](const PotentialReach &left, - const PotentialReach &right) { - return left.pointer.get().size() < right.pointer.get().size(); - }); +auto SchemaFrame::populate_reachability(const Location &base, + const SchemaWalker &walker, + const SchemaResolver &resolver) const + -> const ReachabilityCache & { + const ReachabilityKey key{.pointer = &base.pointer, .orphan = base.orphan}; + auto cache_iterator = this->reachability_.find(key); + if (cache_iterator != this->reachability_.end()) { + return cache_iterator->second; + } - // --------------------------------------------------------------------------- - // (4) Propagate reachability through references using fixpoint iteration - // --------------------------------------------------------------------------- + auto &cache = this->reachability_[key]; + this->populate_reachability_graph(walker, resolver); - bool changed{true}; - while (changed) { - changed = false; + const Location *base_location{&base}; + std::vector queue; + std::unordered_set visited; - auto write_iterator = unreachable_with_sources.begin(); - for (auto read_iterator = unreachable_with_sources.begin(); - read_iterator != unreachable_with_sources.end(); ++read_iterator) { - bool became_reachable = false; + queue.push_back(base_location); + visited.insert(base_location); + auto base_canonical_iterator = + this->location_to_canonical_.find(base_location); + if (base_canonical_iterator != this->location_to_canonical_.end()) { + cache.emplace(base_canonical_iterator->second, true); + } - for (const auto &potential_source : read_iterator->potential_sources) { - if (potential_source.crosses) { - continue; - } + std::size_t queue_index{0}; + while (queue_index < queue.size()) { + const Location *current = queue[queue_index++]; - const auto &source_parent{potential_source.source_pointer->initial()}; - bool source_parent_reachable{source_parent.empty()}; - if (!source_parent_reachable) { - const auto reachability_iterator{ - this->reachability_.find(std::cref(source_parent))}; - source_parent_reachable = - reachability_iterator != this->reachability_.end() && - reachability_iterator->second; - } + auto edges_iterator = this->reachability_graph_.find(current); + if (edges_iterator == this->reachability_graph_.end()) { + continue; + } - if (source_parent_reachable) { - became_reachable = true; - break; - } + for (const auto &edge : edges_iterator->second) { + if (visited.contains(edge.target)) { + continue; } - if (became_reachable) { - this->reachability_[read_iterator->pointer] = true; - changed = true; - } else { - if (write_iterator != read_iterator) { - *write_iterator = std::move(*read_iterator); + if (edge.orphan_context_only && !base.orphan && !current->orphan) { + continue; + } + + if (!edge.is_reference && edge.orphan_context_only) { + auto target_iterator = this->location_members_children_.find( + std::cref(edge.target->pointer)); + if (target_iterator != this->location_members_children_.end()) { + const auto keyword_path{edge.target->pointer.initial()}; + if (keyword_path.starts_with(current->pointer)) { + continue; + } } - ++write_iterator; + } + + visited.insert(edge.target); + queue.push_back(edge.target); + + auto target_canonical_iterator = + this->location_to_canonical_.find(edge.target); + if (target_canonical_iterator != this->location_to_canonical_.end()) { + cache.emplace(target_canonical_iterator->second, true); } } - unreachable_with_sources.erase(write_iterator, - unreachable_with_sources.end()); } + + return cache; } -auto SchemaFrame::is_reachable(const Location &location, +auto SchemaFrame::is_reachable(const Location &base, const Location &location, const SchemaWalker &walker, const SchemaResolver &resolver) const -> bool { assert(location.type != LocationType::Pointer); - this->populate_reachability(walker, resolver); - const auto iterator{this->reachability_.find(std::cref(location.pointer))}; - assert(iterator != this->reachability_.end()); - return iterator->second; + const auto &cache{this->populate_reachability(base, walker, resolver)}; + auto canonical_iterator = this->location_to_canonical_.find(&location); + if (canonical_iterator == this->location_to_canonical_.end()) { + return false; + } + const auto iterator{cache.find(canonical_iterator->second)}; + return iterator != cache.end() && iterator->second; } } // namespace sourcemeta::core diff --git a/vendor/core/src/core/jsonschema/helpers.h b/vendor/core/src/core/jsonschema/helpers.h index 8136371f..ad263555 100644 --- a/vendor/core/src/core/jsonschema/helpers.h +++ b/vendor/core/src/core/jsonschema/helpers.h @@ -34,6 +34,33 @@ inline auto id_keyword(const SchemaBaseDialect base_dialect) return "$id"; } +inline auto definitions_keyword(const SchemaBaseDialect base_dialect) + -> std::string_view { + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_2020_12: + case SchemaBaseDialect::JSON_Schema_2020_12_Hyper: + case SchemaBaseDialect::JSON_Schema_2019_09: + case SchemaBaseDialect::JSON_Schema_2019_09_Hyper: + return "$defs"; + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + return "definitions"; + case SchemaBaseDialect::JSON_Schema_Draft_3: + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_2_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_1_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_0_Hyper: + return ""; + } + + assert(false); + return "$defs"; +} + // In older drafts, the presence of `$ref` would override any sibling keywords // See // https://json-schema.org/draft-07/draft-handrews-json-schema-01#rfc.section.8.3 diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h index 54005110..4e44ce62 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema.h @@ -41,7 +41,12 @@ SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto schema_resolver(const std::string_view identifier) -> std::optional; /// @ingroup jsonschema -/// A default schema walker with support for a wide range of drafs +/// Check if a given identifier corresponds to a known built-in schema +SOURCEMETA_CORE_JSONSCHEMA_EXPORT +auto is_known_schema(const std::string_view identifier) noexcept -> bool; + +/// @ingroup jsonschema +/// A default schema walker with support for a wide range of drafts SOURCEMETA_CORE_JSONSCHEMA_EXPORT auto schema_walker(const std::string_view keyword, const Vocabularies &vocabularies) @@ -408,7 +413,7 @@ auto wrap(std::string_view identifier) -> JSON; /// @ingroup jsonschema /// /// Wrap a schema to only access one of its subschemas. This is useful if you -/// want to perform validation only a specific part of the schemaw without +/// want to perform validation on only a specific part of the schema without /// having to reinvent the wheel. For example: /// /// ```cpp diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h index 522dbfa0..74ec0370 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_frame.h @@ -242,15 +242,12 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { auto reset() -> void; /// Determines if a location could be evaluated during validation - [[nodiscard]] auto is_reachable(const Location &location, + [[nodiscard]] auto is_reachable(const Location &base, + const Location &location, const SchemaWalker &walker, const SchemaResolver &resolver) const -> bool; private: - auto populate_pointer_to_location() const -> void; - auto populate_reachability(const SchemaWalker &walker, - const SchemaResolver &resolver) const -> void; - Mode mode_; // Exporting symbols that depends on the standard C++ library is considered // safe. @@ -265,10 +262,72 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaFrame { std::vector, WeakPointer::Hasher, WeakPointer::Comparator> pointer_to_location_; - mutable std::unordered_map, bool, + mutable std::unordered_set, WeakPointer::Hasher, WeakPointer::Comparator> + pointers_with_non_orphan_; + using ReachabilityCache = std::unordered_map; + struct ReachabilityKey { + const WeakPointer *pointer; + bool orphan; + auto operator==(const ReachabilityKey &other) const noexcept -> bool { + return this->pointer == other.pointer && this->orphan == other.orphan; + } + }; + struct ReachabilityKeyHasher { + auto operator()(const ReachabilityKey &key) const noexcept -> std::size_t { + return std::hash{}(key.pointer) ^ + (std::hash{}(key.orphan) << 1); + } + }; + mutable std::unordered_map reachability_; + mutable std::unordered_map, + std::vector, + WeakPointer::Hasher, WeakPointer::Comparator> + references_by_destination_; + mutable std::unordered_set, + WeakPointer::Hasher, WeakPointer::Comparator> + location_members_children_; + mutable std::unordered_map, + std::vector, WeakPointer::Hasher, + WeakPointer::Comparator> + descendants_by_pointer_; + struct PotentialSource { + const WeakPointer *source_pointer; + WeakPointer source_parent; + bool crosses; + }; + mutable std::unordered_map> + potential_sources_by_location_; + struct ReachabilityEdge { + const Location *target; + bool orphan_context_only; + bool is_reference; + }; + mutable std::unordered_map> + reachability_graph_; + mutable std::unordered_map, + const WeakPointer *, WeakPointer::Hasher, + WeakPointer::Comparator> + canonical_pointer_; + mutable std::unordered_map + location_to_canonical_; bool standalone_{false}; + + auto populate_pointer_to_location() const -> void; + auto populate_reference_graph() const -> void; + auto populate_location_members(const SchemaWalker &walker, + const SchemaResolver &resolver) const -> void; + auto populate_descendants() const -> void; + auto populate_potential_sources(const SchemaWalker &walker, + const SchemaResolver &resolver) const -> void; + auto populate_reachability_graph(const SchemaWalker &walker, + const SchemaResolver &resolver) const + -> void; + auto populate_reachability(const Location &base, const SchemaWalker &walker, + const SchemaResolver &resolver) const + -> const ReachabilityCache &; #if defined(_MSC_VER) #pragma warning(default : 4251 4275) #endif diff --git a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h index f2b9f700..cf48954c 100644 --- a/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h +++ b/vendor/core/src/core/jsonschema/include/sourcemeta/core/jsonschema_transform.h @@ -231,16 +231,17 @@ class SOURCEMETA_CORE_JSONSCHEMA_EXPORT SchemaTransformer { /// Add a rule to the bundle. Rules are evaluated in the order they are added. /// It is the caller's responsibility to not add duplicate rules. template T, typename... Args> - auto add(Args &&...args) -> void { + auto add(Args &&...args) -> std::string_view { static_assert(requires { typename T::mutates; }); static_assert(requires { typename T::reframe_after_transform; }); static_assert( std::is_same_v || std::is_same_v); - this->rules.emplace_back( + auto &entry{this->rules.emplace_back( std::make_unique(std::forward(args)...), std::is_same_v, - std::is_same_v); + std::is_same_v)}; + return std::get<0>(entry)->name(); } /// Remove a rule from the bundle diff --git a/vendor/core/src/core/jsonschema/known_resolver.in.cc b/vendor/core/src/core/jsonschema/known_resolver.in.cc index 3cf290cc..a8a17c9d 100644 --- a/vendor/core/src/core/jsonschema/known_resolver.in.cc +++ b/vendor/core/src/core/jsonschema/known_resolver.in.cc @@ -1,242 +1,436 @@ #include -auto sourcemeta::core::schema_resolver(const std::string_view identifier) - -> std::optional { +#include // std::uint8_t + +enum class KnownSchema : std::uint8_t { + JSONSCHEMA_2020_12, + HYPERSCHEMA_2020_12, + JSONSCHEMA_2020_12_APPLICATOR, + JSONSCHEMA_2020_12_CONTENT, + JSONSCHEMA_2020_12_CORE, + JSONSCHEMA_2020_12_FORMAT_ANNOTATION, + JSONSCHEMA_2020_12_FORMAT_ASSERTION, + JSONSCHEMA_2020_12_HYPER_SCHEMA, + JSONSCHEMA_2020_12_META_DATA, + JSONSCHEMA_2020_12_UNEVALUATED, + JSONSCHEMA_2020_12_VALIDATION, + LINKS_2020_12, + JSONSCHEMA_2020_12_OUTPUT, + + JSONSCHEMA_2019_09, + HYPERSCHEMA_2019_09, + JSONSCHEMA_2019_09_APPLICATOR, + JSONSCHEMA_2019_09_CONTENT, + JSONSCHEMA_2019_09_CORE, + JSONSCHEMA_2019_09_FORMAT, + JSONSCHEMA_2019_09_HYPER_SCHEMA, + JSONSCHEMA_2019_09_META_DATA, + JSONSCHEMA_2019_09_VALIDATION, + LINKS_2019_09, + JSONSCHEMA_2019_09_OUTPUT, + HYPERSCHEMA_2019_09_OUTPUT, + + JSONSCHEMA_DRAFT7, + HYPERSCHEMA_DRAFT7, + LINKS_DRAFT7, + HYPERSCHEMA_DRAFT7_OUTPUT, + + JSONSCHEMA_DRAFT6, + HYPERSCHEMA_DRAFT6, + LINKS_DRAFT6, + + JSONSCHEMA_DRAFT4, + HYPERSCHEMA_DRAFT4, + LINKS_DRAFT4, + + JSONSCHEMA_DRAFT3, + HYPERSCHEMA_DRAFT3, + LINKS_DRAFT3, + JSON_REF_DRAFT3, + + JSONSCHEMA_DRAFT2, + HYPERSCHEMA_DRAFT2, + LINKS_DRAFT2, + JSON_REF_DRAFT2, + + JSONSCHEMA_DRAFT1, + HYPERSCHEMA_DRAFT1, + LINKS_DRAFT1, + JSON_REF_DRAFT1, + + JSONSCHEMA_DRAFT0, + HYPERSCHEMA_DRAFT0, + LINKS_DRAFT0, + JSON_REF_DRAFT0, + + OAS_3_2_DIALECT_2025_09_17, + OAS_3_2_META_2025_09_17, + + OAS_3_1_DIALECT_BASE, + OAS_3_1_META_BASE, + + UNKNOWN +}; + +static auto parse_identifier(const std::string_view identifier) -> KnownSchema { // JSON Schema 2020-12 if (identifier == "https://json-schema.org/draft/2020-12/schema" || - // Just for compatibility given that this is such a common issue identifier == "https://json-schema.org/draft/2020-12/schema#") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12; } else if (identifier == "https://json-schema.org/draft/2020-12/hyper-schema" || - // Just for compatibility given that this is such a common issue identifier == "https://json-schema.org/draft/2020-12/hyper-schema#") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_2020_12@)EOF"); + return KnownSchema::HYPERSCHEMA_2020_12; } else if (identifier == "https://json-schema.org/draft/2020-12/meta/applicator") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_APPLICATOR@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12_APPLICATOR; } else if (identifier == "https://json-schema.org/draft/2020-12/meta/content") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_CONTENT@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12_CONTENT; } else if (identifier == "https://json-schema.org/draft/2020-12/meta/core") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_CORE@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12_CORE; } else if (identifier == "https://json-schema.org/draft/2020-12/meta/format-annotation") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_FORMAT_ANNOTATION@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12_FORMAT_ANNOTATION; } else if (identifier == "https://json-schema.org/draft/2020-12/meta/format-assertion") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_FORMAT_ASSERTION@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12_FORMAT_ASSERTION; } else if (identifier == "https://json-schema.org/draft/2020-12/meta/hyper-schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_HYPER_SCHEMA@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12_HYPER_SCHEMA; } else if (identifier == "https://json-schema.org/draft/2020-12/meta/meta-data") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_META_DATA@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12_META_DATA; } else if (identifier == "https://json-schema.org/draft/2020-12/meta/unevaluated") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_UNEVALUATED@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12_UNEVALUATED; } else if (identifier == "https://json-schema.org/draft/2020-12/meta/validation") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_VALIDATION@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12_VALIDATION; } else if (identifier == "https://json-schema.org/draft/2020-12/links") { - return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_2020_12@)EOF"); + return KnownSchema::LINKS_2020_12; } else if (identifier == "https://json-schema.org/draft/2020-12/output/schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_OUTPUT@)EOF"); + return KnownSchema::JSONSCHEMA_2020_12_OUTPUT; // JSON Schema 2019-09 } else if (identifier == "https://json-schema.org/draft/2019-09/schema" || - // Just for compatibility given that this is such a common issue identifier == "https://json-schema.org/draft/2019-09/schema#") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2019_09@)EOF"); + return KnownSchema::JSONSCHEMA_2019_09; } else if (identifier == "https://json-schema.org/draft/2019-09/hyper-schema" || - // Just for compatibility given that this is such a common issue identifier == "https://json-schema.org/draft/2019-09/hyper-schema#") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_2019_09@)EOF"); + return KnownSchema::HYPERSCHEMA_2019_09; } else if (identifier == "https://json-schema.org/draft/2019-09/meta/applicator") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_APPLICATOR@)EOF"); + return KnownSchema::JSONSCHEMA_2019_09_APPLICATOR; } else if (identifier == "https://json-schema.org/draft/2019-09/meta/content") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_CONTENT@)EOF"); + return KnownSchema::JSONSCHEMA_2019_09_CONTENT; } else if (identifier == "https://json-schema.org/draft/2019-09/meta/core") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_CORE@)EOF"); + return KnownSchema::JSONSCHEMA_2019_09_CORE; } else if (identifier == "https://json-schema.org/draft/2019-09/meta/format") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_FORMAT@)EOF"); + return KnownSchema::JSONSCHEMA_2019_09_FORMAT; } else if (identifier == "https://json-schema.org/draft/2019-09/meta/hyper-schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_HYPER_SCHEMA@)EOF"); + return KnownSchema::JSONSCHEMA_2019_09_HYPER_SCHEMA; } else if (identifier == "https://json-schema.org/draft/2019-09/meta/meta-data") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_META_DATA@)EOF"); + return KnownSchema::JSONSCHEMA_2019_09_META_DATA; } else if (identifier == "https://json-schema.org/draft/2019-09/meta/validation") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_VALIDATION@)EOF"); + return KnownSchema::JSONSCHEMA_2019_09_VALIDATION; } else if (identifier == "https://json-schema.org/draft/2019-09/links") { - return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_2019_09@)EOF"); + return KnownSchema::LINKS_2019_09; } else if (identifier == "https://json-schema.org/draft/2019-09/output/schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_OUTPUT@)EOF"); + return KnownSchema::JSONSCHEMA_2019_09_OUTPUT; } else if (identifier == "https://json-schema.org/draft/2019-09/output/hyper-schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_2019_09_OUTPUT@)EOF"); + return KnownSchema::HYPERSCHEMA_2019_09_OUTPUT; + // JSON Schema Draft7 } else if (identifier == "http://json-schema.org/draft-07/schema#" || identifier == "http://json-schema.org/draft-07/schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT7@)EOF"); + return KnownSchema::JSONSCHEMA_DRAFT7; } else if (identifier == "http://json-schema.org/draft-07/hyper-schema#" || identifier == "http://json-schema.org/draft-07/hyper-schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT7@)EOF"); + return KnownSchema::HYPERSCHEMA_DRAFT7; } else if (identifier == "http://json-schema.org/draft-07/links#" || identifier == "http://json-schema.org/draft-07/links") { - return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT7@)EOF"); + return KnownSchema::LINKS_DRAFT7; } else if (identifier == "http://json-schema.org/draft-07/hyper-schema-output") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT7_OUTPUT@)EOF"); + return KnownSchema::HYPERSCHEMA_DRAFT7_OUTPUT; // JSON Schema Draft6 } else if (identifier == "http://json-schema.org/draft-06/schema#" || identifier == "http://json-schema.org/draft-06/schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT6@)EOF"); + return KnownSchema::JSONSCHEMA_DRAFT6; } else if (identifier == "http://json-schema.org/draft-06/hyper-schema#" || identifier == "http://json-schema.org/draft-06/hyper-schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT6@)EOF"); + return KnownSchema::HYPERSCHEMA_DRAFT6; } else if (identifier == "http://json-schema.org/draft-06/links#" || identifier == "http://json-schema.org/draft-06/links") { - return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT6@)EOF"); + return KnownSchema::LINKS_DRAFT6; // JSON Schema Draft4 } else if (identifier == "http://json-schema.org/draft-04/schema#" || identifier == "http://json-schema.org/draft-04/schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT4@)EOF"); + return KnownSchema::JSONSCHEMA_DRAFT4; } else if (identifier == "http://json-schema.org/draft-04/hyper-schema#" || identifier == "http://json-schema.org/draft-04/hyper-schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT4@)EOF"); + return KnownSchema::HYPERSCHEMA_DRAFT4; } else if (identifier == "http://json-schema.org/draft-04/links#" || identifier == "http://json-schema.org/draft-04/links") { - return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT4@)EOF"); + return KnownSchema::LINKS_DRAFT4; // JSON Schema Draft3 } else if (identifier == "http://json-schema.org/draft-03/schema#" || identifier == "http://json-schema.org/draft-03/schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT3@)EOF"); + return KnownSchema::JSONSCHEMA_DRAFT3; } else if (identifier == "http://json-schema.org/draft-03/hyper-schema#" || identifier == "http://json-schema.org/draft-03/hyper-schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT3@)EOF"); + return KnownSchema::HYPERSCHEMA_DRAFT3; } else if (identifier == "http://json-schema.org/draft-03/links#" || identifier == "http://json-schema.org/draft-03/links") { - return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT3@)EOF"); + return KnownSchema::LINKS_DRAFT3; } else if (identifier == "http://json-schema.org/draft-03/json-ref#" || identifier == "http://json-schema.org/draft-03/json-ref") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSON_REF_DRAFT3@)EOF"); + return KnownSchema::JSON_REF_DRAFT3; // JSON Schema Draft2 } else if (identifier == "http://json-schema.org/draft-02/schema#" || identifier == "http://json-schema.org/draft-02/schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT2@)EOF"); + return KnownSchema::JSONSCHEMA_DRAFT2; } else if (identifier == "http://json-schema.org/draft-02/hyper-schema#" || identifier == "http://json-schema.org/draft-02/hyper-schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT2@)EOF"); + return KnownSchema::HYPERSCHEMA_DRAFT2; } else if (identifier == "http://json-schema.org/draft-02/links#" || identifier == "http://json-schema.org/draft-02/links") { - return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT2@)EOF"); + return KnownSchema::LINKS_DRAFT2; } else if (identifier == "http://json-schema.org/draft-02/json-ref#" || identifier == "http://json-schema.org/draft-02/json-ref") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSON_REF_DRAFT2@)EOF"); + return KnownSchema::JSON_REF_DRAFT2; // JSON Schema Draft1 } else if (identifier == "http://json-schema.org/draft-01/schema#" || identifier == "http://json-schema.org/draft-01/schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT1@)EOF"); + return KnownSchema::JSONSCHEMA_DRAFT1; } else if (identifier == "http://json-schema.org/draft-01/hyper-schema#" || identifier == "http://json-schema.org/draft-01/hyper-schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT1@)EOF"); + return KnownSchema::HYPERSCHEMA_DRAFT1; } else if (identifier == "http://json-schema.org/draft-01/links#" || identifier == "http://json-schema.org/draft-01/links") { - return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT1@)EOF"); + return KnownSchema::LINKS_DRAFT1; } else if (identifier == "http://json-schema.org/draft-01/json-ref#" || identifier == "http://json-schema.org/draft-01/json-ref") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSON_REF_DRAFT1@)EOF"); + return KnownSchema::JSON_REF_DRAFT1; // JSON Schema Draft0 } else if (identifier == "http://json-schema.org/draft-00/schema#" || identifier == "http://json-schema.org/draft-00/schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT0@)EOF"); + return KnownSchema::JSONSCHEMA_DRAFT0; } else if (identifier == "http://json-schema.org/draft-00/hyper-schema#" || identifier == "http://json-schema.org/draft-00/hyper-schema") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT0@)EOF"); + return KnownSchema::HYPERSCHEMA_DRAFT0; } else if (identifier == "http://json-schema.org/draft-00/links#" || identifier == "http://json-schema.org/draft-00/links") { - return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT0@)EOF"); + return KnownSchema::LINKS_DRAFT0; } else if (identifier == "http://json-schema.org/draft-00/json-ref#" || identifier == "http://json-schema.org/draft-00/json-ref") { - return sourcemeta::core::parse_json( - R"EOF(@METASCHEMA_JSON_REF_DRAFT0@)EOF"); + return KnownSchema::JSON_REF_DRAFT0; // OpenAPI v3.2 } else if (identifier == "https://spec.openapis.org/oas/3.2/dialect/2025-09-17") { - return sourcemeta::core::parse_json( - R"EOF(@OPENAPI_OAS_3_2_DIALECT_2025_09_17@)EOF"); + return KnownSchema::OAS_3_2_DIALECT_2025_09_17; } else if (identifier == "https://spec.openapis.org/oas/3.2/meta/2025-09-17") { - return sourcemeta::core::parse_json( - R"EOF(@OPENAPI_OAS_3_2_META_2025_09_17@)EOF"); + return KnownSchema::OAS_3_2_META_2025_09_17; // OpenAPI v3.1 } else if (identifier == "https://spec.openapis.org/oas/3.1/dialect/base") { - return sourcemeta::core::parse_json( - R"EOF(@OPENAPI_OAS_3_1_DIALECT_BASE@)EOF"); + return KnownSchema::OAS_3_1_DIALECT_BASE; } else if (identifier == "https://spec.openapis.org/oas/3.1/meta/base") { - return sourcemeta::core::parse_json(R"EOF(@OPENAPI_OAS_3_1_META_BASE@)EOF"); + return KnownSchema::OAS_3_1_META_BASE; + } + + return KnownSchema::UNKNOWN; +} - // Otherwise - } else { - return std::nullopt; +auto sourcemeta::core::schema_resolver(const std::string_view identifier) + -> std::optional { + switch (parse_identifier(identifier)) { + case KnownSchema::JSONSCHEMA_2020_12: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12@)EOF"); + case KnownSchema::HYPERSCHEMA_2020_12: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_2020_12@)EOF"); + case KnownSchema::JSONSCHEMA_2020_12_APPLICATOR: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_APPLICATOR@)EOF"); + case KnownSchema::JSONSCHEMA_2020_12_CONTENT: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_CONTENT@)EOF"); + case KnownSchema::JSONSCHEMA_2020_12_CORE: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_CORE@)EOF"); + case KnownSchema::JSONSCHEMA_2020_12_FORMAT_ANNOTATION: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_FORMAT_ANNOTATION@)EOF"); + case KnownSchema::JSONSCHEMA_2020_12_FORMAT_ASSERTION: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_FORMAT_ASSERTION@)EOF"); + case KnownSchema::JSONSCHEMA_2020_12_HYPER_SCHEMA: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_HYPER_SCHEMA@)EOF"); + case KnownSchema::JSONSCHEMA_2020_12_META_DATA: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_META_DATA@)EOF"); + case KnownSchema::JSONSCHEMA_2020_12_UNEVALUATED: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_UNEVALUATED@)EOF"); + case KnownSchema::JSONSCHEMA_2020_12_VALIDATION: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_VALIDATION@)EOF"); + case KnownSchema::LINKS_2020_12: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_LINKS_2020_12@)EOF"); + case KnownSchema::JSONSCHEMA_2020_12_OUTPUT: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2020_12_OUTPUT@)EOF"); + case KnownSchema::JSONSCHEMA_2019_09: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2019_09@)EOF"); + case KnownSchema::HYPERSCHEMA_2019_09: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_2019_09@)EOF"); + case KnownSchema::JSONSCHEMA_2019_09_APPLICATOR: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_APPLICATOR@)EOF"); + case KnownSchema::JSONSCHEMA_2019_09_CONTENT: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_CONTENT@)EOF"); + case KnownSchema::JSONSCHEMA_2019_09_CORE: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_CORE@)EOF"); + case KnownSchema::JSONSCHEMA_2019_09_FORMAT: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_FORMAT@)EOF"); + case KnownSchema::JSONSCHEMA_2019_09_HYPER_SCHEMA: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_HYPER_SCHEMA@)EOF"); + case KnownSchema::JSONSCHEMA_2019_09_META_DATA: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_META_DATA@)EOF"); + case KnownSchema::JSONSCHEMA_2019_09_VALIDATION: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_VALIDATION@)EOF"); + case KnownSchema::LINKS_2019_09: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_LINKS_2019_09@)EOF"); + case KnownSchema::JSONSCHEMA_2019_09_OUTPUT: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_2019_09_OUTPUT@)EOF"); + case KnownSchema::HYPERSCHEMA_2019_09_OUTPUT: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_2019_09_OUTPUT@)EOF"); + case KnownSchema::JSONSCHEMA_DRAFT7: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT7@)EOF"); + case KnownSchema::HYPERSCHEMA_DRAFT7: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT7@)EOF"); + case KnownSchema::LINKS_DRAFT7: + return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT7@)EOF"); + case KnownSchema::HYPERSCHEMA_DRAFT7_OUTPUT: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT7_OUTPUT@)EOF"); + case KnownSchema::JSONSCHEMA_DRAFT6: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT6@)EOF"); + case KnownSchema::HYPERSCHEMA_DRAFT6: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT6@)EOF"); + case KnownSchema::LINKS_DRAFT6: + return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT6@)EOF"); + case KnownSchema::JSONSCHEMA_DRAFT4: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT4@)EOF"); + case KnownSchema::HYPERSCHEMA_DRAFT4: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT4@)EOF"); + case KnownSchema::LINKS_DRAFT4: + return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT4@)EOF"); + case KnownSchema::JSONSCHEMA_DRAFT3: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT3@)EOF"); + case KnownSchema::HYPERSCHEMA_DRAFT3: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT3@)EOF"); + case KnownSchema::LINKS_DRAFT3: + return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT3@)EOF"); + case KnownSchema::JSON_REF_DRAFT3: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSON_REF_DRAFT3@)EOF"); + case KnownSchema::JSONSCHEMA_DRAFT2: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT2@)EOF"); + case KnownSchema::HYPERSCHEMA_DRAFT2: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT2@)EOF"); + case KnownSchema::LINKS_DRAFT2: + return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT2@)EOF"); + case KnownSchema::JSON_REF_DRAFT2: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSON_REF_DRAFT2@)EOF"); + case KnownSchema::JSONSCHEMA_DRAFT1: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT1@)EOF"); + case KnownSchema::HYPERSCHEMA_DRAFT1: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT1@)EOF"); + case KnownSchema::LINKS_DRAFT1: + return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT1@)EOF"); + case KnownSchema::JSON_REF_DRAFT1: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSON_REF_DRAFT1@)EOF"); + case KnownSchema::JSONSCHEMA_DRAFT0: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSONSCHEMA_DRAFT0@)EOF"); + case KnownSchema::HYPERSCHEMA_DRAFT0: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_HYPERSCHEMA_DRAFT0@)EOF"); + case KnownSchema::LINKS_DRAFT0: + return sourcemeta::core::parse_json(R"EOF(@METASCHEMA_LINKS_DRAFT0@)EOF"); + case KnownSchema::JSON_REF_DRAFT0: + return sourcemeta::core::parse_json( + R"EOF(@METASCHEMA_JSON_REF_DRAFT0@)EOF"); + case KnownSchema::OAS_3_2_DIALECT_2025_09_17: + return sourcemeta::core::parse_json( + R"EOF(@OPENAPI_OAS_3_2_DIALECT_2025_09_17@)EOF"); + case KnownSchema::OAS_3_2_META_2025_09_17: + return sourcemeta::core::parse_json( + R"EOF(@OPENAPI_OAS_3_2_META_2025_09_17@)EOF"); + case KnownSchema::OAS_3_1_DIALECT_BASE: + return sourcemeta::core::parse_json( + R"EOF(@OPENAPI_OAS_3_1_DIALECT_BASE@)EOF"); + case KnownSchema::OAS_3_1_META_BASE: + return sourcemeta::core::parse_json( + R"EOF(@OPENAPI_OAS_3_1_META_BASE@)EOF"); + case KnownSchema::UNKNOWN: + return std::nullopt; } + + return std::nullopt; +} + +auto sourcemeta::core::is_known_schema( + const std::string_view identifier) noexcept -> bool { + return parse_identifier(identifier) != KnownSchema::UNKNOWN; } diff --git a/vendor/core/src/core/md5/include/sourcemeta/core/md5.h b/vendor/core/src/core/md5/include/sourcemeta/core/md5.h deleted file mode 100644 index e39796fa..00000000 --- a/vendor/core/src/core/md5/include/sourcemeta/core/md5.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef SOURCEMETA_CORE_MD5_H_ -#define SOURCEMETA_CORE_MD5_H_ - -#ifndef SOURCEMETA_CORE_MD5_EXPORT -#include -#endif - -#include // std::ostream -#include // std::string_view - -/// @defgroup md5 MD5 -/// @brief An implementation of RFC 1321 MD5 Message-Digest. -/// -/// This functionality is included as follows: -/// -/// ```cpp -/// #include -/// ``` - -namespace sourcemeta::core { - -/// @ingroup md5 -/// Hash a string using MD5. For example: -/// -/// ```cpp -/// #include -/// #include -/// #include -/// -/// std::ostringstream result; -/// sourcemeta::hydra::md5("foo bar", result); -/// std::cout << result.str() << "\n"; -/// ``` -auto SOURCEMETA_CORE_MD5_EXPORT md5(const std::string_view input, - std::ostream &output) -> void; - -} // namespace sourcemeta::core - -#endif diff --git a/vendor/core/src/core/md5/md5.cc b/vendor/core/src/core/md5/md5.cc deleted file mode 100644 index 92431c9a..00000000 --- a/vendor/core/src/core/md5/md5.cc +++ /dev/null @@ -1,169 +0,0 @@ -#include - -#include // std::array -#include // std::uint32_t, std::uint64_t -#include // std::memcpy -#include // std::hex, std::setfill - -namespace { - -inline constexpr auto rotate_left(std::uint32_t value, - std::uint64_t count) noexcept - -> std::uint32_t { - return (value << count) | (value >> (32u - count)); -} - -inline auto md5_process_block(const unsigned char *block, - std::array &state) noexcept - -> void { - // Constants defined by the MD5 RFC (sine table) - static constexpr std::array k = { - {0xd76aa478U, 0xe8c7b756U, 0x242070dbU, 0xc1bdceeeU, 0xf57c0fafU, - 0x4787c62aU, 0xa8304613U, 0xfd469501U, 0x698098d8U, 0x8b44f7afU, - 0xffff5bb1U, 0x895cd7beU, 0x6b901122U, 0xfd987193U, 0xa679438eU, - 0x49b40821U, 0xf61e2562U, 0xc040b340U, 0x265e5a51U, 0xe9b6c7aaU, - 0xd62f105dU, 0x02441453U, 0xd8a1e681U, 0xe7d3fbc8U, 0x21e1cde6U, - 0xc33707d6U, 0xf4d50d87U, 0x455a14edU, 0xa9e3e905U, 0xfcefa3f8U, - 0x676f02d9U, 0x8d2a4c8aU, 0xfffa3942U, 0x8771f681U, 0x6d9d6122U, - 0xfde5380cU, 0xa4beea44U, 0x4bdecfa9U, 0xf6bb4b60U, 0xbebfbc70U, - 0x289b7ec6U, 0xeaa127faU, 0xd4ef3085U, 0x04881d05U, 0xd9d4d039U, - 0xe6db99e5U, 0x1fa27cf8U, 0xc4ac5665U, 0xf4292244U, 0x432aff97U, - 0xab9423a7U, 0xfc93a039U, 0x655b59c3U, 0x8f0ccc92U, 0xffeff47dU, - 0x85845dd1U, 0x6fa87e4fU, 0xfe2ce6e0U, 0xa3014314U, 0x4e0811a1U, - 0xf7537e82U, 0xbd3af235U, 0x2ad7d2bbU, 0xeb86d391U}}; - - static constexpr std::array s = { - {7u, 12u, 17u, 22u, 7u, 12u, 17u, 22u, 7u, 12u, 17u, 22u, 7u, - 12u, 17u, 22u, 5u, 9u, 14u, 20u, 5u, 9u, 14u, 20u, 5u, 9u, - 14u, 20u, 5u, 9u, 14u, 20u, 4u, 11u, 16u, 23u, 4u, 11u, 16u, - 23u, 4u, 11u, 16u, 23u, 4u, 11u, 16u, 23u, 6u, 10u, 15u, 21u, - 6u, 10u, 15u, 21u, 6u, 10u, 15u, 21u, 6u, 10u, 15u, 21u}}; - - // Decode 16 little-endian 32-bit words from the block - std::array message_words{}; - for (std::uint64_t word_index = 0; word_index < 16u; ++word_index) { - const std::uint64_t byte_index = word_index * 4u; - message_words[word_index] = - static_cast(block[byte_index]) | - (static_cast(block[byte_index + 1u]) << 8u) | - (static_cast(block[byte_index + 2u]) << 16u) | - (static_cast(block[byte_index + 3u]) << 24u); - } - - std::uint32_t a = state[0]; - std::uint32_t b = state[1]; - std::uint32_t c = state[2]; - std::uint32_t d = state[3]; - - for (std::uint64_t round_index = 0u; round_index < 64u; ++round_index) { - std::uint32_t f; - std::uint64_t g; - - if (round_index < 16u) { - f = (b & c) | ((~b) & d); - g = round_index; - } else if (round_index < 32u) { - f = (d & b) | ((~d) & c); - g = (5u * round_index + 1u) % 16u; - } else if (round_index < 48u) { - f = b ^ c ^ d; - g = (3u * round_index + 5u) % 16u; - } else { - f = c ^ (b | (~d)); - g = (7u * round_index) % 16u; - } - - const std::uint32_t temp = d; - d = c; - c = b; - const std::uint32_t computed = a + f + k[round_index] + message_words[g]; - b = b + rotate_left(computed, s[round_index]); - a = temp; - } - - state[0] += a; - state[1] += b; - state[2] += c; - state[3] += d; -} - -} // namespace - -namespace sourcemeta::core { - -auto md5(const std::string_view input, std::ostream &output) -> void { - // Initial state as per RFC 1321 - std::array state{}; - state[0] = 0x67452301U; - state[1] = 0xefcdab89U; - state[2] = 0x98badcfeU; - state[3] = 0x10325476U; - - const auto *const input_bytes = - reinterpret_cast(input.data()); - const std::size_t input_length = input.size(); - - // Process all full 64-byte blocks directly from the input (streaming) - std::size_t processed_bytes = 0u; - while (input_length - processed_bytes >= 64u) { - md5_process_block(input_bytes + processed_bytes, state); - processed_bytes += 64u; - } - - // Prepare the final block(s) (one or two 64-byte blocks) - std::array final_block{}; - const std::size_t remaining_bytes = input_length - processed_bytes; - if (remaining_bytes > 0u) { - std::memcpy(final_block.data(), input_bytes + processed_bytes, - remaining_bytes); - } - - // Append the 0x80 byte after the message data - final_block[remaining_bytes] = 0x80u; - - // Append length in bits as little-endian 64-bit at the end of the padding - const std::uint64_t message_length_bits = - static_cast(input_length) * 8ull; - - if (remaining_bytes < 56u) { - // Enough room for length in the first final block - // place length at final_block[56..63] - for (std::uint64_t index = 0u; index < 8u; ++index) { - final_block[56u + index] = static_cast( - (message_length_bits >> (8u * index)) & 0xffu); - } - md5_process_block(final_block.data(), state); - } else { - // Need two blocks: process final_block[0..63] then final_block[64..127] - // with length - for (std::uint64_t index = 0u; index < 8u; ++index) { - final_block[64u + 56u + index] = static_cast( - (message_length_bits >> (8u * index)) & 0xffu); - } - - md5_process_block(final_block.data(), state); - md5_process_block(final_block.data() + 64u, state); - } - - // Produce the final digest (little-endian) - std::array digest; - for (std::uint64_t state_index = 0u; state_index < 4u; ++state_index) { - const std::uint32_t value = state[state_index]; - const std::uint64_t base_index = state_index * 4u; - digest[base_index + 0u] = static_cast(value & 0xffu); - digest[base_index + 1u] = static_cast((value >> 8u) & 0xffu); - digest[base_index + 2u] = - static_cast((value >> 16u) & 0xffu); - digest[base_index + 3u] = - static_cast((value >> 24u) & 0xffu); - } - - output << std::hex << std::setfill('0'); - for (const unsigned char octet : digest) { - output << std::setw(2) << static_cast(octet); - } - - output.unsetf(std::ios_base::hex); -} - -} // namespace sourcemeta::core diff --git a/vendor/core/src/core/punycode/CMakeLists.txt b/vendor/core/src/core/punycode/CMakeLists.txt index 67002265..4547073f 100644 --- a/vendor/core/src/core/punycode/CMakeLists.txt +++ b/vendor/core/src/core/punycode/CMakeLists.txt @@ -1,7 +1,9 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME punycode PRIVATE_HEADERS error.h - SOURCES punycode.cc utf8.h) + SOURCES punycode.cc) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME punycode) endif() + +target_link_libraries(sourcemeta_core_punycode PRIVATE sourcemeta::core::unicode) diff --git a/vendor/core/src/core/punycode/punycode.cc b/vendor/core/src/core/punycode/punycode.cc index 835d93b9..54e70579 100644 --- a/vendor/core/src/core/punycode/punycode.cc +++ b/vendor/core/src/core/punycode/punycode.cc @@ -1,13 +1,12 @@ #include #include - -#include "utf8.h" +#include #include // std::ranges::sort #include // assert #include // std::uint32_t, std::uint64_t #include // std::numeric_limits -#include // std::istringstream, std::ostringstream +#include // std::ostringstream #include // std::vector namespace sourcemeta::core { @@ -285,12 +284,13 @@ auto punycode_to_utf8(std::istream &input, std::ostream &output) -> void { std::u32string decoded; punycode_decode(encoded, decoded); - utf32_to_utf8(decoded, output); + for (const auto code_point : decoded) { + codepoint_to_utf8(code_point, output); + } } auto utf8_to_punycode(const std::string_view input) -> std::string { - std::istringstream input_stream{std::string{input}}; - const auto codepoints = utf8_to_utf32(input_stream); + const auto codepoints = utf8_to_utf32(input); if (!codepoints.has_value()) { throw PunycodeError("Invalid UTF-8 input"); } @@ -304,7 +304,9 @@ auto punycode_to_utf8(const std::string_view input) -> std::string { std::u32string decoded; punycode_decode(input, decoded); std::ostringstream output_stream; - utf32_to_utf8(decoded, output_stream); + for (const auto code_point : decoded) { + codepoint_to_utf8(code_point, output_stream); + } return output_stream.str(); } diff --git a/vendor/core/src/core/punycode/utf8.h b/vendor/core/src/core/punycode/utf8.h deleted file mode 100644 index 779e3f42..00000000 --- a/vendor/core/src/core/punycode/utf8.h +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef SOURCEMETA_CORE_PUNYCODE_UTF8_H_ -#define SOURCEMETA_CORE_PUNYCODE_UTF8_H_ - -#include // std::uint8_t -#include // std::istream -#include // std::optional, std::nullopt -#include // std::ostream -#include // std::u32string - -// TODO: We might want to extract this into a "unicode" module - -namespace sourcemeta::core { - -inline auto utf8_to_utf32(std::istream &input) - -> std::optional { - std::u32string result; - std::uint8_t byte{0}; - - while (input.read(reinterpret_cast(&byte), 1)) { - char32_t code_point{0}; - std::uint8_t continuation_count{0}; - char32_t minimum{0}; - - if (byte < 0x80) { - result.push_back(byte); - continue; - } else if ((byte & 0xE0) == 0xC0) { - code_point = byte & 0x1F; - continuation_count = 1; - minimum = 0x80; - } else if ((byte & 0xF0) == 0xE0) { - code_point = byte & 0x0F; - continuation_count = 2; - minimum = 0x800; - } else if ((byte & 0xF8) == 0xF0) { - code_point = byte & 0x07; - continuation_count = 3; - minimum = 0x10000; - } else { - return std::nullopt; - } - - for (std::uint8_t index = 0; index < continuation_count; ++index) { - std::uint8_t continuation{0}; - if (!input.read(reinterpret_cast(&continuation), 1) || - (continuation & 0xC0) != 0x80) { - return std::nullopt; - } - - code_point = (code_point << 6) | (continuation & 0x3F); - } - - if (code_point < minimum || code_point > 0x10FFFF || - (code_point >= 0xD800 && code_point <= 0xDFFF)) { - return std::nullopt; - } - - result.push_back(code_point); - } - - return result; -} - -inline auto utf32_to_utf8(const std::u32string &codepoints, - std::ostream &output) -> void { - for (const auto code_point : codepoints) { - if (code_point < 0x80) { - output.put(static_cast(code_point)); - } else if (code_point < 0x800) { - output.put(static_cast(0xC0 | (code_point >> 6))); - output.put(static_cast(0x80 | (code_point & 0x3F))); - } else if (code_point < 0x10000) { - output.put(static_cast(0xE0 | (code_point >> 12))); - output.put(static_cast(0x80 | ((code_point >> 6) & 0x3F))); - output.put(static_cast(0x80 | (code_point & 0x3F))); - } else { - output.put(static_cast(0xF0 | (code_point >> 18))); - output.put(static_cast(0x80 | ((code_point >> 12) & 0x3F))); - output.put(static_cast(0x80 | ((code_point >> 6) & 0x3F))); - output.put(static_cast(0x80 | (code_point & 0x3F))); - } - } -} - -} // namespace sourcemeta::core - -#endif diff --git a/vendor/core/src/core/regex/preprocess.h b/vendor/core/src/core/regex/preprocess.h index 227b41c4..b844aa2c 100644 --- a/vendor/core/src/core/regex/preprocess.h +++ b/vendor/core/src/core/regex/preprocess.h @@ -618,12 +618,21 @@ inline auto preprocess_regex(const std::string &pattern) const bool starts_with_nested = !nested_content.empty() && nested_content[0] == '['; + // Check if the character after the simple bracket end continues + // the class with more v-flag syntax (another nested class or outer + // closing bracket). This distinguishes true v-flag nesting like + // [[a-z][A-Z]] from a literal [ inside a standard class like [[(] + const bool after_simple_continues_class = + simple_end < pattern.size() && + (pattern[simple_end] == '[' || pattern[simple_end] == ']'); + // Use v-flag mode if: // 1. Nested content has v-flag operators (-- or &&), OR // 2. Content starts with [ (indicating v-flag nested class syntax) - // AND the ends differ (so there's actual nesting being tracked) + // AND the ends differ AND the class continues after simple end const bool use_v_flag = - nested_has_ops || (starts_with_nested && simple_end != nested_end); + nested_has_ops || (starts_with_nested && simple_end != nested_end && + after_simple_continues_class); if (use_v_flag) { const auto expanded = expand_char_class(nested_content); diff --git a/vendor/core/src/core/semver/CMakeLists.txt b/vendor/core/src/core/semver/CMakeLists.txt new file mode 100644 index 00000000..3141a669 --- /dev/null +++ b/vendor/core/src/core/semver/CMakeLists.txt @@ -0,0 +1,9 @@ +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME semver + PRIVATE_HEADERS error.h + SOURCES semver.cc) +target_link_libraries(sourcemeta_core_semver + PUBLIC sourcemeta::core::preprocessor) + +if(SOURCEMETA_CORE_INSTALL) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME semver) +endif() diff --git a/vendor/core/src/core/semver/include/sourcemeta/core/semver.h b/vendor/core/src/core/semver/include/sourcemeta/core/semver.h new file mode 100644 index 00000000..c55b3892 --- /dev/null +++ b/vendor/core/src/core/semver/include/sourcemeta/core/semver.h @@ -0,0 +1,131 @@ +#ifndef SOURCEMETA_CORE_SEMVER_H_ +#define SOURCEMETA_CORE_SEMVER_H_ + +#ifndef SOURCEMETA_CORE_SEMVER_EXPORT +#include +#endif + +// NOLINTBEGIN(misc-include-cleaner) +#include +// NOLINTEND(misc-include-cleaner) + +#include + +#include // std::uint64_t +#include // std::optional +#include // std::string +#include // std::string_view + +/// @defgroup semver SemVer +/// @brief An implementation of the Semantic Versioning 2.0.0 specification. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup semver +/// A non-owning view over a parsed SemVer 2.0.0 version string. +/// The input string must outlive this object. +class SOURCEMETA_CORE_SEMVER_EXPORT SemVer { +public: + enum class Mode : std::uint8_t { + Strict, + + // Permits the following deviations on the version core only: + // - Optional "v" or "V" prefix (e.g. "v1.2.3") + // - Missing patch, defaulting to 0 (e.g. "1.2") + // - Missing minor and patch, defaulting to 0 (e.g. "1") + // - Combinations of the above (e.g. "v1", "v1.2") + Loose + }; + + SemVer(std::string_view input, Mode mode = Mode::Strict); + + [[nodiscard]] static auto from(std::string_view input, + Mode mode = Mode::Strict) noexcept + -> std::optional; + + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto major() const noexcept + -> std::uint64_t { + return this->major_; + } + + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto minor() const noexcept + -> std::uint64_t { + return this->minor_; + } + + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto patch() const noexcept + -> std::uint64_t { + return this->patch_; + } + + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto pre_release() const noexcept + -> std::string_view { + return this->pre_release_; + } + + [[nodiscard]] SOURCEMETA_FORCEINLINE inline auto build() const noexcept + -> std::string_view { + return this->build_; + } + + // Build metadata is included in equality because equality is not + // precedence. The spec says build metadata must be ignored when + // determining version *precedence*, but two versions with different build + // metadata are distinct versions. + // See https://semver.org/spec/v2.0.0.html#spec-item-10 + SOURCEMETA_FORCEINLINE inline auto + operator==(const SemVer &other) const noexcept -> bool { + return this->major_ == other.major_ && this->minor_ == other.minor_ && + this->patch_ == other.patch_ && + this->pre_release_ == other.pre_release_ && + this->build_ == other.build_; + } + + auto operator<(const SemVer &other) const noexcept -> bool; + + SOURCEMETA_FORCEINLINE inline auto + operator!=(const SemVer &other) const noexcept -> bool { + return !(*this == other); + } + + SOURCEMETA_FORCEINLINE inline auto + operator>(const SemVer &other) const noexcept -> bool { + return other < *this; + } + + SOURCEMETA_FORCEINLINE inline auto + operator<=(const SemVer &other) const noexcept -> bool { + return !(other < *this); + } + + SOURCEMETA_FORCEINLINE inline auto + operator>=(const SemVer &other) const noexcept -> bool { + return !(*this < other); + } + + [[nodiscard]] auto to_string() const -> std::string; + +private: + SemVer() = default; + std::uint64_t major_{0}; + std::uint64_t minor_{0}; + std::uint64_t patch_{0}; +#if defined(_MSC_VER) +#pragma warning(disable : 4251) +#endif + std::string_view pre_release_; + std::string_view build_; +#if defined(_MSC_VER) +#pragma warning(default : 4251) +#endif +}; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/semver/include/sourcemeta/core/semver_error.h b/vendor/core/src/core/semver/include/sourcemeta/core/semver_error.h new file mode 100644 index 00000000..361e580e --- /dev/null +++ b/vendor/core/src/core/semver/include/sourcemeta/core/semver_error.h @@ -0,0 +1,56 @@ +#ifndef SOURCEMETA_CORE_SEMVER_ERROR_H_ +#define SOURCEMETA_CORE_SEMVER_ERROR_H_ + +#ifndef SOURCEMETA_CORE_SEMVER_EXPORT +#include +#endif + +#include // std::uint64_t +#include // std::exception + +namespace sourcemeta::core { + +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + +class SOURCEMETA_CORE_SEMVER_EXPORT SemVerParseError : public std::exception { +public: + SemVerParseError(const std::uint64_t column) : column_{column} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return "The input is not a valid Semantic Version"; + } + + [[nodiscard]] auto column() const noexcept -> std::uint64_t { + return this->column_; + } + +private: + std::uint64_t column_; +}; + +class SOURCEMETA_CORE_SEMVER_EXPORT SemVerOverflowError + : public std::exception { +public: + SemVerOverflowError(const std::uint64_t column) : column_{column} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return "The numeric component of the Semantic Version overflows"; + } + + [[nodiscard]] auto column() const noexcept -> std::uint64_t { + return this->column_; + } + +private: + std::uint64_t column_; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/semver/semver.cc b/vendor/core/src/core/semver/semver.cc new file mode 100644 index 00000000..7d286434 --- /dev/null +++ b/vendor/core/src/core/semver/semver.cc @@ -0,0 +1,450 @@ +#include + +#include // std::numeric_limits +#include // std::optional, std::nullopt +#include // std::string, std::to_string + +namespace { + +auto is_digit(const char character) -> bool { + return character >= '0' && character <= '9'; +} + +auto is_letter(const char character) -> bool { + return (character >= 'A' && character <= 'Z') || + (character >= 'a' && character <= 'z'); +} + +auto is_identifier_character(const char character) -> bool { + return is_digit(character) || is_letter(character) || character == '-'; +} + +constexpr auto UINT64_MAX_VALUE = std::numeric_limits::max(); +constexpr auto UINT64_MAX_DIV_10 = UINT64_MAX_VALUE / 10; +constexpr auto UINT64_MAX_MOD_10 = UINT64_MAX_VALUE % 10; + +enum class NumericParseResult : std::uint8_t { success, invalid, overflow }; + +auto parse_numeric_identifier(const std::string_view input, + std::size_t &position, std::uint64_t &result) + -> NumericParseResult { + if (position >= input.size() || !is_digit(input[position])) { + return NumericParseResult::invalid; + } + + if (input[position] == '0' && position + 1 < input.size() && + is_digit(input[position + 1])) { + return NumericParseResult::invalid; + } + + std::uint64_t value = 0; + while (position < input.size() && is_digit(input[position])) { + const auto digit = static_cast(input[position] - '0'); + if (value > UINT64_MAX_DIV_10 || + (value == UINT64_MAX_DIV_10 && digit > UINT64_MAX_MOD_10)) { + return NumericParseResult::overflow; + } + + value = value * 10 + digit; + ++position; + } + + result = value; + return NumericParseResult::success; +} + +auto validate_pre_release_identifier(const std::string_view identifier) + -> bool { + if (identifier.empty()) { + return false; + } + + bool has_non_digit = false; + for (const auto character : identifier) { + if (!is_identifier_character(character)) { + return false; + } + + if (!is_digit(character)) { + has_non_digit = true; + } + } + + if (!has_non_digit && identifier.size() > 1 && identifier[0] == '0') { + return false; + } + + return true; +} + +auto validate_build_identifier(const std::string_view identifier) -> bool { + if (identifier.empty()) { + return false; + } + + for (const auto character : identifier) { + if (!is_identifier_character(character)) { + return false; + } + } + + return true; +} + +template +auto validate_dot_separated(const std::string_view input) -> bool { + if (input.empty()) { + return false; + } + + std::size_t start = 0; + while (start <= input.size()) { + auto dot_position = input.find('.', start); + if (dot_position == std::string_view::npos) { + dot_position = input.size(); + } + + if (!validator(input.substr(start, dot_position - start))) { + return false; + } + + start = dot_position + 1; + if (dot_position == input.size()) { + break; + } + } + + return true; +} + +struct IdentifierInfo { + bool is_numeric; + bool overflowed; + std::uint64_t numeric_value; +}; + +auto classify_identifier(const std::string_view identifier) noexcept + -> IdentifierInfo { + std::uint64_t value = 0; + for (const auto character : identifier) { + if (!is_digit(character)) { + return {.is_numeric = false, .overflowed = false, .numeric_value = 0}; + } + + const auto digit = static_cast(character - '0'); + if (value > UINT64_MAX_DIV_10 || + (value == UINT64_MAX_DIV_10 && digit > UINT64_MAX_MOD_10)) { + return {.is_numeric = true, .overflowed = true, .numeric_value = 0}; + } + + value = value * 10 + digit; + } + + return {.is_numeric = true, .overflowed = false, .numeric_value = value}; +} + +auto compare_pre_release(const std::string_view left, + const std::string_view right) noexcept -> int { + if (left.empty() && right.empty()) { + return 0; + } + + if (left.empty()) { + return 1; + } + + if (right.empty()) { + return -1; + } + + std::size_t left_position = 0; + std::size_t right_position = 0; + + while (left_position <= left.size() && right_position <= right.size()) { + auto left_dot = left.find('.', left_position); + if (left_dot == std::string_view::npos) { + left_dot = left.size(); + } + + auto right_dot = right.find('.', right_position); + if (right_dot == std::string_view::npos) { + right_dot = right.size(); + } + + const std::string_view left_identifier{left.data() + left_position, + left_dot - left_position}; + const std::string_view right_identifier{right.data() + right_position, + right_dot - right_position}; + + const auto left_info = classify_identifier(left_identifier); + const auto right_info = classify_identifier(right_identifier); + + if (left_info.is_numeric && right_info.is_numeric) { + if (left_info.overflowed || right_info.overflowed) { + if (left_identifier.size() != right_identifier.size()) { + return left_identifier.size() < right_identifier.size() ? -1 : 1; + } + + if (left_identifier < right_identifier) { + return -1; + } + + if (left_identifier > right_identifier) { + return 1; + } + } else { + if (left_info.numeric_value < right_info.numeric_value) { + return -1; + } + + if (left_info.numeric_value > right_info.numeric_value) { + return 1; + } + } + } else if (left_info.is_numeric && !right_info.is_numeric) { + return -1; + } else if (!left_info.is_numeric && right_info.is_numeric) { + return 1; + } else { + if (left_identifier < right_identifier) { + return -1; + } + + if (left_identifier > right_identifier) { + return 1; + } + } + + left_position = left_dot + 1; + right_position = right_dot + 1; + + if (left_dot == left.size() && right_dot == right.size()) { + break; + } + + if (left_dot == left.size()) { + return -1; + } + + if (right_dot == right.size()) { + return 1; + } + } + + return 0; +} + +template +auto parse_semver(const std::string_view input, std::uint64_t &major, + std::uint64_t &minor, std::uint64_t &patch, + std::string_view &pre_release, std::string_view &build) + -> bool { + std::size_t position = 0; + + if constexpr (loose) { + if (position < input.size() && + (input[position] == 'v' || input[position] == 'V')) { + ++position; + } + } + + const auto major_result = parse_numeric_identifier(input, position, major); + if (major_result == NumericParseResult::overflow) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerOverflowError(position + 1); + } + + return false; + } + + if (major_result == NumericParseResult::invalid) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerParseError(position + 1); + } + + return false; + } + + auto can_end_core = [&]() -> bool { + if (position >= input.size() || input[position] == '-' || + input[position] == '+') { + return loose; + } + + return input[position] == '.'; + }; + + if (!can_end_core()) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerParseError(position + 1); + } + + return false; + } + + if (position < input.size() && input[position] == '.') { + ++position; + + const auto minor_result = parse_numeric_identifier(input, position, minor); + if (minor_result == NumericParseResult::overflow) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerOverflowError(position + 1); + } + + return false; + } + + if (minor_result == NumericParseResult::invalid) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerParseError(position + 1); + } + + return false; + } + + if (!can_end_core()) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerParseError(position + 1); + } + + return false; + } + + if (position < input.size() && input[position] == '.') { + ++position; + + const auto patch_result = + parse_numeric_identifier(input, position, patch); + if (patch_result == NumericParseResult::overflow) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerOverflowError(position + 1); + } + + return false; + } + + if (patch_result == NumericParseResult::invalid) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerParseError(position + 1); + } + + return false; + } + } + } + + if (position < input.size() && input[position] == '-') { + ++position; + const auto start = position; + while (position < input.size() && input[position] != '+') { + ++position; + } + + pre_release = input.substr(start, position - start); + if (!validate_dot_separated(pre_release)) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerParseError(start + 1); + } + + return false; + } + } + + if (position < input.size() && input[position] == '+') { + ++position; + const auto start = position; + position = input.size(); + + build = input.substr(start, position - start); + if (!validate_dot_separated(build)) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerParseError(start + 1); + } + + return false; + } + } + + if (position != input.size()) { + if constexpr (should_throw) { + throw sourcemeta::core::SemVerParseError(position + 1); + } + + return false; + } + + return true; +} + +} // namespace + +namespace sourcemeta::core { + +SemVer::SemVer(const std::string_view input, const Mode mode) { + if (mode == Mode::Loose) { + parse_semver(input, this->major_, this->minor_, this->patch_, + this->pre_release_, this->build_); + } else { + parse_semver(input, this->major_, this->minor_, this->patch_, + this->pre_release_, this->build_); + } +} + +auto SemVer::from(const std::string_view input, const Mode mode) noexcept + -> std::optional { + SemVer result; + bool success = false; + + if (mode == Mode::Loose) { + success = parse_semver(input, result.major_, result.minor_, + result.patch_, result.pre_release_, + result.build_); + } else { + success = parse_semver(input, result.major_, result.minor_, + result.patch_, result.pre_release_, + result.build_); + } + + if (success) { + return result; + } + + return std::nullopt; +} + +auto SemVer::operator<(const SemVer &other) const noexcept -> bool { + if (this->major_ != other.major_) { + return this->major_ < other.major_; + } + + if (this->minor_ != other.minor_) { + return this->minor_ < other.minor_; + } + + if (this->patch_ != other.patch_) { + return this->patch_ < other.patch_; + } + + return compare_pre_release(this->pre_release_, other.pre_release_) < 0; +} + +auto SemVer::to_string() const -> std::string { + std::string result = std::to_string(this->major_); + result += '.'; + result += std::to_string(this->minor_); + result += '.'; + result += std::to_string(this->patch_); + if (!this->pre_release_.empty()) { + result += '-'; + result.append(this->pre_release_.data(), this->pre_release_.size()); + } + + if (!this->build_.empty()) { + result += '+'; + result.append(this->build_.data(), this->build_.size()); + } + + return result; +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/uuid/CMakeLists.txt b/vendor/core/src/core/unicode/CMakeLists.txt similarity index 50% rename from vendor/core/src/core/uuid/CMakeLists.txt rename to vendor/core/src/core/unicode/CMakeLists.txt index 324b05a7..533ee967 100644 --- a/vendor/core/src/core/uuid/CMakeLists.txt +++ b/vendor/core/src/core/unicode/CMakeLists.txt @@ -1,5 +1,6 @@ -sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME uuid SOURCES uuid.cc) +sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME unicode + SOURCES unicode.cc) if(SOURCEMETA_CORE_INSTALL) - sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME uuid) + sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME unicode) endif() diff --git a/vendor/core/src/core/unicode/include/sourcemeta/core/unicode.h b/vendor/core/src/core/unicode/include/sourcemeta/core/unicode.h new file mode 100644 index 00000000..c8845a08 --- /dev/null +++ b/vendor/core/src/core/unicode/include/sourcemeta/core/unicode.h @@ -0,0 +1,103 @@ +#ifndef SOURCEMETA_CORE_UNICODE_H_ +#define SOURCEMETA_CORE_UNICODE_H_ + +#ifndef SOURCEMETA_CORE_UNICODE_EXPORT +#include +#endif + +#include // std::istream +#include // std::optional +#include // std::ostream +#include // std::string, std::u32string +#include // std::string_view + +/// @defgroup unicode Unicode +/// @brief Unicode encoding utilities. +/// +/// This functionality is included as follows: +/// +/// ```cpp +/// #include +/// ``` + +namespace sourcemeta::core { + +/// @ingroup unicode +/// Encode a single Unicode codepoint as a UTF-8 string. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// assert(sourcemeta::core::codepoint_to_utf8(0x41) == "A"); +/// ``` +SOURCEMETA_CORE_UNICODE_EXPORT +auto codepoint_to_utf8(const char32_t codepoint) -> std::string; + +/// @ingroup unicode +/// Encode a single Unicode codepoint as UTF-8 into an output stream. +/// For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// std::ostringstream output; +/// sourcemeta::core::codepoint_to_utf8(0x41, output); +/// assert(output.str() == "A"); +/// ``` +SOURCEMETA_CORE_UNICODE_EXPORT +auto codepoint_to_utf8(const char32_t codepoint, std::ostream &output) -> void; + +/// @ingroup unicode +/// Encode a single Unicode codepoint as UTF-8, appending to an existing string. +/// For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// std::string output; +/// sourcemeta::core::codepoint_to_utf8(0x41, output); +/// assert(output == "A"); +/// ``` +SOURCEMETA_CORE_UNICODE_EXPORT +auto codepoint_to_utf8(const char32_t codepoint, std::string &output) -> void; + +/// @ingroup unicode +/// Decode a UTF-8 byte stream into a sequence of Unicode codepoints (UTF-32). +/// Returns std::nullopt if the input contains invalid UTF-8. For example: +/// +/// ```cpp +/// #include +/// #include +/// #include +/// +/// std::istringstream input{"A"}; +/// const auto result{sourcemeta::core::utf8_to_utf32(input)}; +/// assert(result.has_value()); +/// assert(result.value() == std::u32string{0x41}); +/// ``` +SOURCEMETA_CORE_UNICODE_EXPORT +auto utf8_to_utf32(std::istream &input) -> std::optional; + +/// @ingroup unicode +/// Decode a UTF-8 string into a sequence of Unicode codepoints (UTF-32). +/// Returns std::nullopt if the input contains invalid UTF-8. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// const auto result{sourcemeta::core::utf8_to_utf32("A")}; +/// assert(result.has_value()); +/// assert(result.value() == std::u32string{0x41}); +/// ``` +SOURCEMETA_CORE_UNICODE_EXPORT +auto utf8_to_utf32(const std::string_view input) + -> std::optional; + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/unicode/unicode.cc b/vendor/core/src/core/unicode/unicode.cc new file mode 100644 index 00000000..8ce8d7c6 --- /dev/null +++ b/vendor/core/src/core/unicode/unicode.cc @@ -0,0 +1,114 @@ +#include + +#include // assert +#include // std::uint8_t +#include // std::istringstream, std::ostringstream + +namespace sourcemeta::core { + +auto codepoint_to_utf8(const char32_t codepoint, std::ostream &output) -> void { + assert(codepoint <= 0x10FFFF); + assert(codepoint < 0xD800 || codepoint > 0xDFFF); + if (codepoint < 0x80) { + output.put(static_cast(codepoint)); + } else if (codepoint < 0x800) { + output.put(static_cast(0xC0 | (codepoint >> 6))); + output.put(static_cast(0x80 | (codepoint & 0x3F))); + } else if (codepoint < 0x10000) { + output.put(static_cast(0xE0 | (codepoint >> 12))); + output.put(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + output.put(static_cast(0x80 | (codepoint & 0x3F))); + } else { + output.put(static_cast(0xF0 | (codepoint >> 18))); + output.put(static_cast(0x80 | ((codepoint >> 12) & 0x3F))); + output.put(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + output.put(static_cast(0x80 | (codepoint & 0x3F))); + } +} + +auto codepoint_to_utf8(const char32_t codepoint, std::string &output) -> void { + assert(codepoint <= 0x10FFFF); + assert(codepoint < 0xD800 || codepoint > 0xDFFF); + if (codepoint < 0x80) { + output.push_back(static_cast(codepoint)); + } else if (codepoint < 0x800) { + output.push_back(static_cast(0xC0 | (codepoint >> 6))); + output.push_back(static_cast(0x80 | (codepoint & 0x3F))); + } else if (codepoint < 0x10000) { + output.push_back(static_cast(0xE0 | (codepoint >> 12))); + output.push_back(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + output.push_back(static_cast(0x80 | (codepoint & 0x3F))); + } else { + output.push_back(static_cast(0xF0 | (codepoint >> 18))); + output.push_back(static_cast(0x80 | ((codepoint >> 12) & 0x3F))); + output.push_back(static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + output.push_back(static_cast(0x80 | (codepoint & 0x3F))); + } +} + +auto codepoint_to_utf8(const char32_t codepoint) -> std::string { + std::string output; + codepoint_to_utf8(codepoint, output); + return output; +} + +auto utf8_to_utf32(std::istream &input) -> std::optional { + std::u32string result; + std::uint8_t byte{0}; + + while (input.read(reinterpret_cast(&byte), 1)) { + char32_t code_point{0}; + std::uint8_t continuation_count{0}; + char32_t minimum{0}; + + if (byte < 0x80) { + result.push_back(byte); + continue; + } else if ((byte & 0xE0) == 0xC0) { + code_point = byte & 0x1F; + continuation_count = 1; + minimum = 0x80; + } else if ((byte & 0xF0) == 0xE0) { + code_point = byte & 0x0F; + continuation_count = 2; + minimum = 0x800; + } else if ((byte & 0xF8) == 0xF0) { + code_point = byte & 0x07; + continuation_count = 3; + minimum = 0x10000; + } else { + return std::nullopt; + } + + for (std::uint8_t index = 0; index < continuation_count; ++index) { + std::uint8_t continuation{0}; + if (!input.read(reinterpret_cast(&continuation), 1) || + (continuation & 0xC0) != 0x80) { + return std::nullopt; + } + + code_point = (code_point << 6) | (continuation & 0x3F); + } + + if (code_point < minimum || code_point > 0x10FFFF || + (code_point >= 0xD800 && code_point <= 0xDFFF)) { + return std::nullopt; + } + + result.push_back(code_point); + } + + if (!input.eof()) { + return std::nullopt; + } + + return result; +} + +auto utf8_to_utf32(const std::string_view input) + -> std::optional { + std::istringstream stream{std::string{input}}; + return utf8_to_utf32(stream); +} + +} // namespace sourcemeta::core diff --git a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h index a4bae290..418da9e0 100644 --- a/vendor/core/src/core/uri/include/sourcemeta/core/uri.h +++ b/vendor/core/src/core/uri/include/sourcemeta/core/uri.h @@ -335,7 +335,7 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// /// const sourcemeta::core::URI /// uri{"https://www.sourcemeta.com/foo#bar"}; - /// assert(uri.recompose_without_fragment().has_value()"); + /// assert(uri.recompose_without_fragment().has_value()); /// assert(uri.recompose_without_fragment().value() == /// "https://sourcemeta.com/foo"); /// ``` @@ -349,7 +349,7 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// #include /// /// sourcemeta::core::URI uri{"hTtP://exAmpLe.com:80/TEST"}; - /// uri.canonicalize(): + /// uri.canonicalize(); /// assert(uri.recompose() == "http://example.com/TEST"); /// ``` auto canonicalize() -> URI &; @@ -417,7 +417,7 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// /// const sourcemeta::core::URI uri{"https://user:@host"}; /// assert(uri.userinfo().has_value()); - /// assert(uri.userinfo().value() == "user:); + /// assert(uri.userinfo().value() == "user:"); /// ``` /// /// As mentioned in RFC 3986, the format "user:password" is deprecated. @@ -469,6 +469,34 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// ``` static auto canonicalize(std::string_view input) -> std::string; + /// Check if the given string is a valid absolute URI (has a scheme) per + /// RFC 3986 without constructing a full URI object. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// assert(sourcemeta::core::URI::is_uri("https://example.com/path")); + /// assert(!sourcemeta::core::URI::is_uri("://bad")); + /// assert(!sourcemeta::core::URI::is_uri("relative/path")); + /// ``` + [[nodiscard]] static auto is_uri(std::string_view input) noexcept -> bool; + + /// Check if the given string is a valid URI reference per RFC 3986 + /// (absolute or relative) without constructing a full URI object. + /// For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// assert(sourcemeta::core::URI::is_uri_reference("https://example.com")); + /// assert(sourcemeta::core::URI::is_uri_reference("relative/path")); + /// assert(!sourcemeta::core::URI::is_uri_reference("://bad")); + /// ``` + [[nodiscard]] static auto is_uri_reference(std::string_view input) noexcept + -> bool; + private: auto parse(std::string_view input) -> void; diff --git a/vendor/core/src/core/uri/parse.cc b/vendor/core/src/core/uri/parse.cc index fa95685b..bb1f8bec 100644 --- a/vendor/core/src/core/uri/parse.cc +++ b/vendor/core/src/core/uri/parse.cc @@ -3,12 +3,15 @@ #include "escaping.h" #include "grammar.h" -#include // assert -#include // std::isalnum, std::isxdigit, std::isalpha, std::isdigit -#include // std::uint64_t -#include // std::optional -#include // std::string, std::stoul +#include // assert +#include // std::isalnum, std::isxdigit, std::isalpha, std::isdigit +#include // std::uint64_t +#include // std::numeric_limits +#include // std::optional +#include // std::out_of_range +#include // std::string, std::stoul #include // std::string_view +#include // std::conditional_t namespace { @@ -86,12 +89,17 @@ auto validate_percent_encoded_utf8(const std::string_view input, return 3 * (1 + continuation_count); } +template auto parse_scheme(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { if (position >= input.size() || !std::isalpha(static_cast(input[position]))) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } const auto start = position; @@ -102,21 +110,35 @@ auto parse_scheme(const std::string_view input, } if (position < input.size() && input[position] == URI_COLON) { - std::string scheme{input.substr(start, position - start)}; - position += 1; - return scheme; + if constexpr (CheckOnly) { + position += 1; + return true; + } else { + std::string scheme{input.substr(start, position - start)}; + position += 1; + return scheme; + } } position = start; - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } +template auto parse_port(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { if (position >= input.size() || !std::isdigit(static_cast(input[position]))) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } const auto start = position; @@ -125,19 +147,77 @@ auto parse_port(const std::string_view input, position += 1; } - const std::string port_string{input.substr(start, position - start)}; - return std::stoul(port_string); + if constexpr (CheckOnly) { + return true; + } else { + try { + const std::string port_string{input.substr(start, position - start)}; + return std::stoul(port_string); + } catch (const std::out_of_range &) { + throw sourcemeta::core::URIParseError{ + static_cast(start + 1)}; + } + } } +template auto parse_ipv6(const std::string_view input, - std::string_view::size_type &position) -> std::string { + std::string_view::size_type &position) + -> std::conditional_t { assert(input[position] == URI_OPEN_BRACKET); const auto start = position; position += 1; - while (position < input.size() && input[position] != URI_CLOSE_BRACKET) { + // RFC 3986: IP-literal = "[" ( IPv6address / IPvFuture ) "]" + if (position < input.size() && + (input[position] == 'v' || input[position] == 'V')) { + // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) position += 1; + + // Require 1*HEXDIG for the version + if (position >= input.size() || input[position] == URI_CLOSE_BRACKET || + !std::isxdigit(static_cast(input[position]))) { + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } + while (position < input.size() && input[position] != URI_CLOSE_BRACKET && + std::isxdigit(static_cast(input[position]))) { + position += 1; + } + + // Require "." separator + if (position >= input.size() || input[position] != URI_DOT) { + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } + position += 1; + + // Require 1*( unreserved / sub-delims / ":" ) + if (position >= input.size() || input[position] == URI_CLOSE_BRACKET) { + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } + while (position < input.size() && input[position] != URI_CLOSE_BRACKET) { + const auto current = input[position]; + if (!uri_is_unreserved(current) && !uri_is_sub_delim(current) && + current != URI_COLON) { + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } + position += 1; + } + } else { + // IPv6address: only HEXDIG, ":", and "." are valid + while (position < input.size() && input[position] != URI_CLOSE_BRACKET) { + const auto current = input[position]; + if (!std::isxdigit(static_cast(current)) && + current != URI_COLON && current != URI_DOT) { + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } + position += 1; + } } if (position >= input.size()) { @@ -145,19 +225,34 @@ auto parse_ipv6(const std::string_view input, static_cast(start + 1)}; } - std::string ipv6{input.substr(start + 1, position - start - 1)}; - position += 1; - return ipv6; + if constexpr (CheckOnly) { + position += 1; + } else { + std::string ipv6{input.substr(start + 1, position - start - 1)}; + position += 1; + return ipv6; + } } +template auto parse_host(const std::string_view input, - std::string_view::size_type &position) -> std::string { + std::string_view::size_type &position) + -> std::conditional_t { if (position >= input.size()) { - return std::string{}; + if constexpr (!CheckOnly) { + return std::string{}; + } else { + return; + } } if (input[position] == URI_OPEN_BRACKET) { - return parse_ipv6(input, position); + if constexpr (CheckOnly) { + parse_ipv6(input, position); + return; + } else { + return parse_ipv6(input, position); + } } const auto start = position; @@ -179,23 +274,31 @@ auto parse_host(const std::string_view input, } } - if (position == start) { - return std::string{}; - } + if constexpr (!CheckOnly) { + if (position == start) { + return std::string{}; + } - return std::string{input.substr(start, position - start)}; + return std::string{input.substr(start, position - start)}; + } } +template auto parse_userinfo(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { const auto start = position; while (position < input.size()) { const auto current = input[position]; if (current == URI_AT) { - std::string userinfo{input.substr(start, position - start)}; - position += 1; - return userinfo; + if constexpr (CheckOnly) { + position += 1; + return true; + } else { + std::string userinfo{input.substr(start, position - start)}; + position += 1; + return userinfo; + } } if (current == URI_PERCENT) { @@ -210,19 +313,32 @@ auto parse_userinfo(const std::string_view input, } position = start; - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } +template auto parse_path(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { if (position >= input.size()) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } const auto first_char = input[position]; if (first_char == URI_QUESTION || first_char == URI_HASH) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } const auto start = position; @@ -243,14 +359,23 @@ auto parse_path(const std::string_view input, } } - return std::string{input.substr(start, position - start)}; + if constexpr (CheckOnly) { + return true; + } else { + return std::string{input.substr(start, position - start)}; + } } +template auto parse_query(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { if (position >= input.size() || input[position] != URI_QUESTION) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } position += 1; @@ -274,14 +399,23 @@ auto parse_query(const std::string_view input, } } - return std::string{input.substr(start, position - start)}; + if constexpr (CheckOnly) { + return true; + } else { + return std::string{input.substr(start, position - start)}; + } } +template auto parse_fragment(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { if (position >= input.size() || input[position] != URI_HASH) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } position += 1; @@ -302,60 +436,83 @@ auto parse_fragment(const std::string_view input, } } - return std::string{input.substr(start, position - start)}; + if constexpr (CheckOnly) { + return true; + } else { + return std::string{input.substr(start, position - start)}; + } } -} // namespace - -namespace sourcemeta::core { - +template auto parse_authority(const std::string_view input, std::string_view::size_type &position, - std::optional &userinfo, - std::optional &host, - std::optional &port) -> void { - auto userinfo_raw = parse_userinfo(input, position); - if (userinfo_raw.has_value()) { - uri_unescape_selective_inplace(userinfo_raw.value()); - userinfo = std::move(userinfo_raw.value()); - } + [[maybe_unused]] std::optional &userinfo, + [[maybe_unused]] std::optional &host, + [[maybe_unused]] std::optional &port) + -> void { + if constexpr (CheckOnly) { + parse_userinfo(input, position); + parse_host(input, position); + } else { + auto userinfo_raw = parse_userinfo(input, position); + if (userinfo_raw.has_value()) { + uri_unescape_selective_inplace(userinfo_raw.value()); + userinfo = std::move(userinfo_raw.value()); + } - auto host_raw = parse_host(input, position); - uri_unescape_selective_inplace(host_raw); - host = std::move(host_raw); + auto host_raw = parse_host(input, position); + uri_unescape_selective_inplace(host_raw); + host = std::move(host_raw); + } + // RFC 3986: authority = [ userinfo "@" ] host [ ":" port ] + // port = *DIGIT (empty port after colon is valid) if (position < input.size() && input[position] == URI_COLON) { - const auto colon_position = position; position += 1; - const auto port_value = parse_port(input, position); - if (port_value.has_value()) { - port = port_value.value(); + if constexpr (CheckOnly) { + parse_port(input, position); } else { - position = colon_position; + const auto port_start = position; + const auto port_value = parse_port(input, position); + if (port_value.has_value()) { + if (port_value.value() > std::numeric_limits::max()) { + throw sourcemeta::core::URIParseError{ + static_cast(port_start + 1)}; + } + + port = static_cast(port_value.value()); + } } } if (position < input.size() && input[position] == URI_AT) { - throw URIParseError{static_cast(position + 1)}; + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; } } -auto URI::parse(const std::string_view input) -> void { - assert(!this->scheme_.has_value()); - assert(!this->userinfo_.has_value()); - assert(!this->host_.has_value()); - assert(!this->port_.has_value()); - assert(!this->path_.has_value()); - assert(!this->query_.has_value()); - assert(!this->fragment_.has_value()); - +template +auto do_parse(const std::string_view input, + [[maybe_unused]] std::optional &scheme, + [[maybe_unused]] std::optional &userinfo, + [[maybe_unused]] std::optional &host, + [[maybe_unused]] std::optional &port, + [[maybe_unused]] std::optional &path, + [[maybe_unused]] std::optional &query, + [[maybe_unused]] std::optional &fragment) -> bool { if (input.empty()) { - return; + return false; } std::string_view::size_type position{0}; - this->scheme_ = parse_scheme(input, position); + bool has_scheme; + if constexpr (CheckOnly) { + has_scheme = parse_scheme(input, position); + } else { + scheme = parse_scheme(input, position); + has_scheme = scheme.has_value(); + } const auto has_authority = position + 1 < input.size() && input[position] == URI_SLASH && @@ -363,34 +520,127 @@ auto URI::parse(const std::string_view input) -> void { if (has_authority) { position += 2; - parse_authority(input, position, this->userinfo_, this->host_, this->port_); - } + parse_authority(input, position, userinfo, host, port); - auto path = parse_path(input, position); + // RFC 3986: hier-part = "//" authority path-abempty + // path-abempty = *( "/" segment ), so after authority the next character + // must be "/", "?", "#", or end-of-input + if (position < input.size() && input[position] != URI_SLASH && + input[position] != URI_QUESTION && input[position] != URI_HASH) { + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } + } - if (path.has_value()) { - uri_unescape_selective_inplace(path.value()); - this->path_ = std::move(path.value()); - } else if (has_authority || this->scheme_.has_value()) { - if (input.ends_with(URI_SLASH) || input == "/") { - this->path_ = "/"; + const auto path_start = position; + bool has_path; + if constexpr (CheckOnly) { + has_path = parse_path(input, position); + } else { + auto parsed_path = parse_path(input, position); + has_path = parsed_path.has_value(); + + if (has_path) { + // RFC 3986: relative-ref without authority uses path-noscheme, + // where the first segment must not contain a colon + if (!has_scheme && !has_authority) { + const auto &path_value = parsed_path.value(); + if (!path_value.empty() && path_value[0] != URI_SLASH) { + const auto first_slash = path_value.find(URI_SLASH); + const auto colon_pos = path_value.find(URI_COLON); + if (colon_pos != std::string::npos && + (first_slash == std::string::npos || colon_pos < first_slash)) { + throw sourcemeta::core::URIParseError{ + static_cast(colon_pos + 1)}; + } + } + } + + uri_unescape_selective_inplace(parsed_path.value()); + path = std::move(parsed_path.value()); + } else if (has_authority || has_scheme) { + if (input.ends_with(URI_SLASH) || input == "/") { + path = "/"; + } } } - auto query = parse_query(input, position); - if (query.has_value()) { - uri_unescape_selective_inplace(query.value()); - this->query_ = std::move(query.value()); + if constexpr (CheckOnly) { + if (has_path && !has_scheme && !has_authority) { + if (input[path_start] != URI_SLASH) { + const auto path_view = input.substr(path_start, position - path_start); + const auto first_slash = path_view.find(URI_SLASH); + const auto colon_pos = path_view.find(URI_COLON); + if (colon_pos != std::string_view::npos && + (first_slash == std::string_view::npos || + colon_pos < first_slash)) { + throw sourcemeta::core::URIParseError{ + static_cast(path_start + colon_pos + 1)}; + } + } + } } - auto fragment = parse_fragment(input, position); - if (fragment.has_value()) { - uri_unescape_selective_inplace(fragment.value()); - this->fragment_ = std::move(fragment.value()); + if constexpr (CheckOnly) { + parse_query(input, position); + parse_fragment(input, position); + } else { + auto parsed_query = parse_query(input, position); + if (parsed_query.has_value()) { + uri_unescape_selective_inplace(parsed_query.value()); + query = std::move(parsed_query.value()); + } + + auto parsed_fragment = parse_fragment(input, position); + if (parsed_fragment.has_value()) { + uri_unescape_selective_inplace(parsed_fragment.value()); + fragment = std::move(parsed_fragment.value()); + } } if (position < input.size()) { - throw URIParseError{static_cast(position + 1)}; + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; + } + + return has_scheme; +} + +} // namespace + +namespace sourcemeta::core { + +auto URI::parse(const std::string_view input) -> void { + assert(!this->scheme_.has_value()); + assert(!this->userinfo_.has_value()); + assert(!this->host_.has_value()); + assert(!this->port_.has_value()); + assert(!this->path_.has_value()); + assert(!this->query_.has_value()); + assert(!this->fragment_.has_value()); + do_parse(input, this->scheme_, this->userinfo_, this->host_, + this->port_, this->path_, this->query_, this->fragment_); +} + +auto URI::is_uri(const std::string_view input) noexcept -> bool { + try { + std::optional scheme, userinfo, host, path, query, fragment; + std::optional port; + return do_parse(input, scheme, userinfo, host, port, path, query, + fragment); + } catch (...) { + return false; + } +} + +auto URI::is_uri_reference(const std::string_view input) noexcept -> bool { + try { + std::optional scheme, userinfo, host, path, query, fragment; + std::optional port; + do_parse(input, scheme, userinfo, host, port, path, query, fragment); + return true; + } catch (...) { + return false; } } diff --git a/vendor/core/src/core/uri/resolution.cc b/vendor/core/src/core/uri/resolution.cc index cd5b9261..91902311 100644 --- a/vendor/core/src/core/uri/resolution.cc +++ b/vendor/core/src/core/uri/resolution.cc @@ -202,11 +202,19 @@ auto URI::relative_to(const URI &base) -> URI & { return *this; } - // Hosts must match (but both can be null for URNs) + // The full authority must match (but components can be null for URNs) + if (this->userinfo_ != base.userinfo_) { + return *this; + } + if (this->host_ != base.host_) { return *this; } + if (this->port_ != base.port_) { + return *this; + } + // Special case: both URIs are exactly the same if (this->path_ == base.path_ && this->query_ == base.query_ && this->fragment_ == base.fragment_) { diff --git a/vendor/core/src/core/uritemplate/CMakeLists.txt b/vendor/core/src/core/uritemplate/CMakeLists.txt index 427d7955..cf03111b 100644 --- a/vendor/core/src/core/uritemplate/CMakeLists.txt +++ b/vendor/core/src/core/uritemplate/CMakeLists.txt @@ -5,5 +5,3 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME uritemplate if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME uritemplate) endif() - -target_link_libraries(sourcemeta_core_uritemplate PUBLIC sourcemeta::core::io) diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_error.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_error.h index 0898a39c..9a7b42d0 100644 --- a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_error.h +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_error.h @@ -123,6 +123,26 @@ class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterSaveError const char *message_; }; +/// @ingroup uritemplate +/// An error that represents a failure to read the router from disk +class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterReadError + : public std::exception { +public: + URITemplateRouterReadError(std::filesystem::path path) + : path_{std::move(path)} {} + + [[nodiscard]] auto what() const noexcept -> const char * override { + return "Failed to open router file for reading"; + } + + [[nodiscard]] auto path() const noexcept -> const std::filesystem::path & { + return this->path_; + } + +private: + std::filesystem::path path_; +}; + #if defined(_MSC_VER) #pragma warning(default : 4251 4275) #endif diff --git a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h index 81d80103..763a7f8b 100644 --- a/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h +++ b/vendor/core/src/core/uritemplate/include/sourcemeta/core/uritemplate_router.h @@ -5,8 +5,7 @@ #include #endif -#include - +#include // std::size_t #include // std::uint16_t, std::uint32_t, std::uint8_t #include // std::filesystem::path #include // std::function @@ -85,7 +84,7 @@ class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouter { }; /// @ingroup uritemplate -/// A read-only memory-mapped view of a serialized URI Template router +/// A read-only view of a serialized URI Template router class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterView { public: /// A serialized node in the binary format @@ -112,6 +111,7 @@ class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterView { const std::filesystem::path &path) -> void; URITemplateRouterView(const std::filesystem::path &path); + URITemplateRouterView(const std::uint8_t *data, std::size_t size); // To avoid mistakes URITemplateRouterView(const URITemplateRouterView &) = delete; @@ -127,7 +127,7 @@ class SOURCEMETA_CORE_URITEMPLATE_EXPORT URITemplateRouterView { -> URITemplateRouter::Identifier; private: - FileView file_view_; + std::vector data_; }; #if defined(_MSC_VER) diff --git a/vendor/core/src/core/uritemplate/uritemplate_router_view.cc b/vendor/core/src/core/uritemplate/uritemplate_router_view.cc index 425e0f34..b5190295 100644 --- a/vendor/core/src/core/uritemplate/uritemplate_router_view.cc +++ b/vendor/core/src/core/uritemplate/uritemplate_router_view.cc @@ -1,8 +1,7 @@ #include -#include // assert #include // std::memcmp -#include // std::ofstream +#include // std::ofstream, std::ifstream #include // std::numeric_limits #include // std::queue #include // std::string @@ -27,9 +26,9 @@ struct RouterHeader { // Binary search for a literal child matching the given segment inline auto binary_search_literal_children( const URITemplateRouterView::Node *nodes, const char *string_table, - const std::uint32_t first_child, const std::uint32_t child_count, - const char *segment, const std::uint32_t segment_length) noexcept - -> std::uint32_t { + const std::size_t string_table_size, const std::uint32_t first_child, + const std::uint32_t child_count, const char *segment, + const std::uint32_t segment_length) noexcept -> std::uint32_t { std::uint32_t low = 0; std::uint32_t high = child_count; @@ -38,6 +37,11 @@ inline auto binary_search_literal_children( const auto child_index = first_child + middle; const auto &child = nodes[child_index]; + if (child.string_offset > string_table_size || + child.string_length > string_table_size - child.string_offset) { + return NO_CHILD; + } + // Compare segments lexicographically (content first, then length) const auto min_length = segment_length < child.string_length ? segment_length @@ -170,21 +174,65 @@ auto URITemplateRouterView::save(const URITemplateRouter &router, } } -URITemplateRouterView::URITemplateRouterView(const std::filesystem::path &path) - : file_view_{path} {} +URITemplateRouterView::URITemplateRouterView( + const std::filesystem::path &path) { + std::ifstream file(path, std::ios::binary | std::ios::ate); + if (!file) { + throw URITemplateRouterReadError{path}; + } + + const auto position = file.tellg(); + if (position < 0) { + throw URITemplateRouterReadError{path}; + } + + const auto size = static_cast(position); + file.seekg(0, std::ios::beg); + this->data_.resize(size); + file.read(reinterpret_cast(this->data_.data()), + static_cast(size)); + if (!file) { + throw URITemplateRouterReadError{path}; + } +} + +URITemplateRouterView::URITemplateRouterView(const std::uint8_t *data, + const std::size_t size) + : data_{data, data + size} {} auto URITemplateRouterView::match(const std::string_view path, const URITemplateRouter::Callback &callback) const -> URITemplateRouter::Identifier { - const auto *header = this->file_view_.as(); - assert(header->magic == ROUTER_MAGIC); - assert(header->version == ROUTER_VERSION); + if (this->data_.size() < sizeof(RouterHeader)) { + return 0; + } - const auto *nodes = this->file_view_.as(sizeof(RouterHeader)); - const auto *string_table = - header->string_table_offset < this->file_view_.size() - ? this->file_view_.as(header->string_table_offset) - : nullptr; + const auto *header = + reinterpret_cast(this->data_.data()); + if (header->magic != ROUTER_MAGIC || header->version != ROUTER_VERSION) { + return 0; + } + + if (header->node_count == 0 || + header->node_count > + (this->data_.size() - sizeof(RouterHeader)) / sizeof(Node)) { + return 0; + } + + const auto *nodes = + reinterpret_cast(this->data_.data() + sizeof(RouterHeader)); + const auto nodes_size = + static_cast(header->node_count) * sizeof(Node); + const auto expected_string_table_offset = sizeof(RouterHeader) + nodes_size; + if (header->string_table_offset < expected_string_table_offset || + header->string_table_offset > this->data_.size()) { + return 0; + } + + const auto *string_table = reinterpret_cast( + this->data_.data() + header->string_table_offset); + const auto string_table_size = + this->data_.size() - header->string_table_offset; // Empty path matches empty template if (path.empty()) { @@ -198,9 +246,15 @@ auto URITemplateRouterView::match(const std::string_view path, return 0; } + if (root.first_literal_child >= header->node_count || + root.literal_child_count > + header->node_count - root.first_literal_child) { + return 0; + } + const auto match = binary_search_literal_children( - nodes, string_table, root.first_literal_child, root.literal_child_count, - "", 0); + nodes, string_table, string_table_size, root.first_literal_child, + root.literal_child_count, "", 0); return match != NO_CHILD ? nodes[match].identifier : 0; } @@ -232,11 +286,17 @@ auto URITemplateRouterView::match(const std::string_view path, } const auto &node = nodes[current_node]; + const auto node_count = header->node_count; // Try literal children first if (node.first_literal_child != NO_CHILD) { + if (node.first_literal_child >= node_count || + node.literal_child_count > node_count - node.first_literal_child) { + return 0; + } + const auto literal_match = binary_search_literal_children( - nodes, string_table, node.first_literal_child, + nodes, string_table, string_table_size, node.first_literal_child, node.literal_child_count, segment_start, segment_length); if (literal_match != NO_CHILD) { current_node = literal_match; @@ -250,10 +310,20 @@ auto URITemplateRouterView::match(const std::string_view path, // Fall back to variable child if (node.variable_child != NO_CHILD) { - assert(variable_index <= - std::numeric_limits::max()); + if (node.variable_child >= node_count || + variable_index > + std::numeric_limits::max()) { + return 0; + } + const auto &variable_node = nodes[node.variable_child]; + if (variable_node.string_offset > string_table_size || + variable_node.string_length > + string_table_size - variable_node.string_offset) { + return 0; + } + // Check if this is an expansion (catch-all) if (variable_node.type == URITemplateRouter::NodeType::Expansion) { const auto remaining_length = diff --git a/vendor/core/src/core/uuid/include/sourcemeta/core/uuid.h b/vendor/core/src/core/uuid/include/sourcemeta/core/uuid.h deleted file mode 100644 index 4a6601c6..00000000 --- a/vendor/core/src/core/uuid/include/sourcemeta/core/uuid.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef SOURCEMETA_CORE_UUID_H_ -#define SOURCEMETA_CORE_UUID_H_ - -#ifndef SOURCEMETA_CORE_UUID_EXPORT -#include -#endif - -#include // std::string - -/// @defgroup uuid UUID -/// @brief A growing implementation of RFC 9562 UUID. -/// -/// This functionality is included as follows: -/// -/// ```cpp -/// #include -/// ``` - -namespace sourcemeta::core { - -/// @ingroup uuid -/// Generate a random UUID v4 string. For example: -/// -/// ```cpp -/// #include -/// #include -/// -/// std::cout << sourcemeta::core::uuidv4() << "\n"; -/// ``` -/// -/// See https://www.rfc-editor.org/rfc/rfc9562#name-uuid-version-4 -SOURCEMETA_CORE_UUID_EXPORT auto uuidv4() -> std::string; - -} // namespace sourcemeta::core - -#endif diff --git a/vendor/core/src/core/uuid/uuid.cc b/vendor/core/src/core/uuid/uuid.cc deleted file mode 100644 index 67d0e16a..00000000 --- a/vendor/core/src/core/uuid/uuid.cc +++ /dev/null @@ -1,34 +0,0 @@ -#include - -#include // std::array -#include // std::uint8_t -#include // std::random_device, std::mt19937, std::uniform_int_distribution -#include // std::string_view - -namespace sourcemeta::core { - -// Adapted from https://stackoverflow.com/a/58467162/1641422 -auto uuidv4() -> std::string { - static std::random_device device; - static std::mt19937 generator{device()}; - static constexpr std::string_view digits = "0123456789abcdef"; - static constexpr std::array dash = { - {false, false, false, false, true, false, true, false, true, false, true, - false, false, false, false, false}}; - std::uniform_int_distribution distribution(0, - 15); - std::string result; - result.reserve(36); - for (bool is_dash : dash) { - if (is_dash) { - result += "-"; - } - - result += digits[distribution(generator)]; - result += digits[distribution(generator)]; - } - - return result; -} - -} // namespace sourcemeta::core diff --git a/vendor/core/src/core/yaml/CMakeLists.txt b/vendor/core/src/core/yaml/CMakeLists.txt index 92a0bac1..d4a4aa2b 100644 --- a/vendor/core/src/core/yaml/CMakeLists.txt +++ b/vendor/core/src/core/yaml/CMakeLists.txt @@ -1,11 +1,12 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME yaml - PRIVATE_HEADERS error.h - SOURCES yaml.cc) + PRIVATE_HEADERS error.h roundtrip.h + SOURCES yaml.cc lexer.h parser.h stringify.h) if(SOURCEMETA_CORE_INSTALL) sourcemeta_library_install(NAMESPACE sourcemeta PROJECT core NAME yaml) endif() -target_link_libraries(sourcemeta_core_yaml PRIVATE yaml) target_link_libraries(sourcemeta_core_yaml PUBLIC sourcemeta::core::json) +target_link_libraries(sourcemeta_core_yaml PUBLIC sourcemeta::core::jsonpointer) target_link_libraries(sourcemeta_core_yaml PRIVATE sourcemeta::core::io) +target_link_libraries(sourcemeta_core_yaml PRIVATE sourcemeta::core::unicode) diff --git a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml.h b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml.h index 2b0e9762..e806db08 100644 --- a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml.h +++ b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml.h @@ -9,13 +9,15 @@ // NOLINTBEGIN(misc-include-cleaner) #include +#include // NOLINTEND(misc-include-cleaner) #include // std::filesystem #include // std::basic_istream +#include // std::basic_ostream /// @defgroup yaml YAML -/// @brief A YAML compatibility library based on `libyaml`. +/// @brief A YAML parser that converts YAML to JSON. /// /// This functionality is included as follows: /// @@ -41,8 +43,8 @@ namespace sourcemeta::core { /// assert(document.is_object()); /// ``` SOURCEMETA_CORE_YAML_EXPORT -auto parse_yaml(std::basic_istream &stream, - const JSON::ParseCallback &callback = nullptr) -> JSON; +auto parse_yaml(std::basic_istream &stream) + -> JSON; /// @ingroup yaml /// @@ -62,8 +64,7 @@ auto parse_yaml(std::basic_istream &stream, /// std::cerr << "\n"; /// ``` SOURCEMETA_CORE_YAML_EXPORT -auto parse_yaml(const JSON::String &input, - const JSON::ParseCallback &callback = nullptr) -> JSON; +auto parse_yaml(const JSON::String &input) -> JSON; /// @ingroup yaml /// @@ -84,8 +85,38 @@ auto parse_yaml(const JSON::String &input, /// std::cerr << "\n"; /// ``` SOURCEMETA_CORE_YAML_EXPORT -auto read_yaml(const std::filesystem::path &path, - const JSON::ParseCallback &callback = nullptr) -> JSON; +auto read_yaml(const std::filesystem::path &path) -> JSON; + +/// @ingroup yaml +/// +/// Parse a YAML document from a C++ standard input stream into an existing +/// JSON value, invoking the given callback during parsing. The result is +/// constructed directly into the given reference rather than returned by value +/// to ensure that references passed through the parse callback remain valid +/// after parsing completes. +SOURCEMETA_CORE_YAML_EXPORT +auto parse_yaml(std::basic_istream &stream, + JSON &output, const JSON::ParseCallback &callback) -> void; + +/// @ingroup yaml +/// +/// Parse a YAML string into an existing JSON value, invoking the given +/// callback during parsing. The result is constructed directly into the given +/// reference rather than returned by value to ensure that references passed +/// through the parse callback remain valid after parsing completes. +SOURCEMETA_CORE_YAML_EXPORT +auto parse_yaml(const JSON::String &input, JSON &output, + const JSON::ParseCallback &callback) -> void; + +/// @ingroup yaml +/// +/// Read a YAML file into an existing JSON value, invoking the given callback +/// during parsing. The result is constructed directly into the given reference +/// rather than returned by value to ensure that references passed through the +/// parse callback remain valid after parsing completes. +SOURCEMETA_CORE_YAML_EXPORT +auto read_yaml(const std::filesystem::path &path, JSON &output, + const JSON::ParseCallback &callback) -> void; /// @ingroup yaml /// @@ -106,8 +137,87 @@ auto read_yaml(const std::filesystem::path &path, /// std::cerr << "\n"; /// ``` SOURCEMETA_CORE_YAML_EXPORT -auto read_yaml_or_json(const std::filesystem::path &path, - const JSON::ParseCallback &callback = nullptr) -> JSON; +auto read_yaml_or_json(const std::filesystem::path &path) -> JSON; + +/// @ingroup yaml +/// +/// Read a JSON document from a file that represents YAML or JSON, constructing +/// into the given reference and invoking the callback during parsing. The +/// result is constructed directly into the given reference rather than returned +/// by value to ensure that references passed through the parse callback (such +/// as object property names) remain valid after parsing completes. +SOURCEMETA_CORE_YAML_EXPORT +auto read_yaml_or_json(const std::filesystem::path &path, JSON &output, + const JSON::ParseCallback &callback) -> void; + +/// @ingroup yaml +/// +/// Create a JSON document from a YAML string, collecting round-trip metadata +/// to reproduce the original formatting. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// sourcemeta::core::YAMLRoundTrip roundtrip; +/// const std::string input{"hello: world"}; +/// const sourcemeta::core::JSON document = +/// sourcemeta::core::parse_yaml(input, roundtrip); +/// ``` +SOURCEMETA_CORE_YAML_EXPORT +auto parse_yaml(const JSON::String &input, YAMLRoundTrip &roundtrip) -> JSON; + +/// @ingroup yaml +/// +/// Parse a YAML string with round-trip metadata into an existing JSON value, +/// invoking the given callback during parsing. The result is constructed +/// directly into the given reference rather than returned by value to ensure +/// that references passed through the parse callback remain valid after +/// parsing completes. +SOURCEMETA_CORE_YAML_EXPORT +auto parse_yaml(const JSON::String &input, YAMLRoundTrip &roundtrip, + JSON &output, const JSON::ParseCallback &callback) -> void; + +/// @ingroup yaml +/// +/// Stringify a JSON document as YAML, using round-trip metadata collected +/// during parsing to preserve the original formatting. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// #include +/// +/// sourcemeta::core::YAMLRoundTrip roundtrip; +/// const std::string input{"hello: world"}; +/// const sourcemeta::core::JSON document = +/// sourcemeta::core::parse_yaml(input, roundtrip); +/// sourcemeta::core::stringify_yaml(document, std::cout, roundtrip); +/// ``` +SOURCEMETA_CORE_YAML_EXPORT +auto stringify_yaml(const JSON &document, + std::basic_ostream &stream, + const YAMLRoundTrip &roundtrip) -> void; + +/// @ingroup yaml +/// +/// Stringify a JSON document as YAML. For example: +/// +/// ```cpp +/// #include +/// #include +/// +/// #include +/// +/// const sourcemeta::core::JSON document = +/// sourcemeta::core::parse_json(R"JSON({ "foo": "bar" })JSON"); +/// sourcemeta::core::stringify_yaml(document, std::cout); +/// ``` +SOURCEMETA_CORE_YAML_EXPORT +auto stringify_yaml(const JSON &document, + std::basic_ostream &stream) + -> void; } // namespace sourcemeta::core diff --git a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h index 5ac5ff46..5c7a76fe 100644 --- a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h +++ b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_error.h @@ -5,15 +5,15 @@ #include #endif +#include // std::uint64_t #include // std::exception #include // std::string #include // std::string_view -#include // std::move namespace sourcemeta::core { // Exporting symbols that depends on the standard C++ library is considered -// safe. +// safe // https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN #if defined(_MSC_VER) #pragma warning(disable : 4251 4275) @@ -37,19 +37,38 @@ class SOURCEMETA_CORE_YAML_EXPORT YAMLError : public std::exception { }; /// @ingroup yaml -/// An error that represents YAML parse error event +/// An error that represents a YAML parse error event class SOURCEMETA_CORE_YAML_EXPORT YAMLParseError : public std::exception { public: - YAMLParseError(const char *message) : message_{message} {} - YAMLParseError(std::string message) = delete; - YAMLParseError(std::string &&message) = delete; - YAMLParseError(std::string_view message) = delete; + YAMLParseError(const std::uint64_t line, const std::uint64_t column) + : line_{line}, column_{column}, + message_{"Failed to parse the YAML document"} {} + + YAMLParseError(const std::uint64_t line, const std::uint64_t column, + const char *message) + : line_{line}, column_{column}, message_{message} {} + YAMLParseError(const std::uint64_t line, const std::uint64_t column, + std::string message) = delete; + YAMLParseError(const std::uint64_t line, const std::uint64_t column, + std::string &&message) = delete; + YAMLParseError(const std::uint64_t line, const std::uint64_t column, + std::string_view message) = delete; [[nodiscard]] auto what() const noexcept -> const char * override { return this->message_; } + [[nodiscard]] auto line() const noexcept -> std::uint64_t { + return this->line_; + } + + [[nodiscard]] auto column() const noexcept -> std::uint64_t { + return this->column_; + } + private: + std::uint64_t line_; + std::uint64_t column_; const char *message_; }; @@ -58,8 +77,9 @@ class SOURCEMETA_CORE_YAML_EXPORT YAMLParseError : public std::exception { class SOURCEMETA_CORE_YAML_EXPORT YAMLUnknownAnchorError : public YAMLParseError { public: - YAMLUnknownAnchorError(const std::string_view anchor_name) - : YAMLParseError{"YAML alias references undefined anchor"}, + YAMLUnknownAnchorError(const std::string_view anchor_name, + const std::uint64_t line, const std::uint64_t column) + : YAMLParseError{line, column, "YAML alias references undefined anchor"}, anchor_name_{anchor_name} {} [[nodiscard]] auto anchor() const noexcept -> std::string_view { @@ -70,6 +90,30 @@ class SOURCEMETA_CORE_YAML_EXPORT YAMLUnknownAnchorError std::string anchor_name_; }; +/// @ingroup yaml +/// An error that represents a duplicate key in a YAML mapping +/// YAML 1.2.2 requires unique keys in mappings, unlike JSON where duplicate +/// keys are undefined behavior. See https://yaml.org/spec/1.2.2/#mapping +class SOURCEMETA_CORE_YAML_EXPORT YAMLDuplicateKeyError + : public YAMLParseError { +public: + YAMLDuplicateKeyError(const std::string_view key_name, + const std::uint64_t line, const std::uint64_t column) + : YAMLParseError{line, column, "Duplicate key in YAML mapping"}, + key_name_{key_name} {} + + [[nodiscard]] auto key() const noexcept -> std::string_view { + return this->key_name_; + } + +private: + std::string key_name_; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + } // namespace sourcemeta::core #endif diff --git a/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_roundtrip.h b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_roundtrip.h new file mode 100644 index 00000000..c22f0516 --- /dev/null +++ b/vendor/core/src/core/yaml/include/sourcemeta/core/yaml_roundtrip.h @@ -0,0 +1,80 @@ +#ifndef SOURCEMETA_CORE_YAML_ROUNDTRIP_H_ +#define SOURCEMETA_CORE_YAML_ROUNDTRIP_H_ + +#ifndef SOURCEMETA_CORE_YAML_EXPORT +#include +#endif + +#include +#include + +#include // std::uint8_t, std::size_t +#include // std::optional +#include // std::string +#include // std::unordered_map +#include // std::vector + +namespace sourcemeta::core { + +// Exporting symbols that depends on the standard C++ library is considered +// safe +// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-2-c4275?view=msvc-170&redirectedfrom=MSDN +#if defined(_MSC_VER) +#pragma warning(disable : 4251 4275) +#endif + +/// @ingroup yaml +/// Holds per-node metadata collected during YAML parsing to reproduce the +/// original formatting +class SOURCEMETA_CORE_YAML_EXPORT YAMLRoundTrip { +public: + enum class ScalarStyle : std::uint8_t { + Plain, + SingleQuoted, + DoubleQuoted, + Literal, + Folded + }; + + enum class CollectionStyle : std::uint8_t { Block, Flow }; + + enum class Chomping : std::uint8_t { Clip, Strip, Keep }; + + struct NodeStyle { + std::optional scalar; + std::optional collection; + std::optional chomping; + std::size_t explicit_indent{0}; + bool indent_before_chomping{false}; + std::optional block_content; + std::optional plain_content; + std::optional quoted_content; + std::optional anchor; + std::vector comments_before; + std::optional comment_inline; + std::optional comment_on_indicator; + bool compact_flow{false}; + }; + + std::unordered_map styles; + std::unordered_map aliases; + std::unordered_map key_styles; + std::unordered_map key_quoted_contents; + bool explicit_document_start{false}; + bool explicit_document_end{false}; + std::optional document_start_comment; + std::optional document_end_comment; + std::vector leading_comments; + std::vector post_start_comments; + std::vector pre_end_comments; + std::vector trailing_comments; + std::size_t indent_width{2}; +}; + +#if defined(_MSC_VER) +#pragma warning(default : 4251 4275) +#endif + +} // namespace sourcemeta::core + +#endif diff --git a/vendor/core/src/core/yaml/lexer.h b/vendor/core/src/core/yaml/lexer.h new file mode 100644 index 00000000..3e9a6aea --- /dev/null +++ b/vendor/core/src/core/yaml/lexer.h @@ -0,0 +1,1496 @@ +#ifndef SOURCEMETA_CORE_YAML_LEXER_H_ +#define SOURCEMETA_CORE_YAML_LEXER_H_ + +#include +#include + +#include // std::uint8_t, std::uint64_t +#include // std::deque +#include // std::optional +#include // std::string +#include // std::string_view +#include // std::vector + +namespace sourcemeta::core::yaml { + +enum class TokenType : std::uint8_t { + StreamStart, + StreamEnd, + DocumentStart, + DocumentEnd, + MappingStart, + MappingEnd, + SequenceStart, + SequenceEnd, + BlockMappingKey, + BlockMappingValue, + BlockSequenceEntry, + FlowEntry, + Scalar, + Anchor, + Alias, + Tag, + DirectiveYAML, + DirectiveTag, + DirectiveReserved +}; + +enum class ScalarStyle : std::uint8_t { + Plain, + SingleQuoted, + DoubleQuoted, + Literal, + Folded +}; + +enum class BlockChomping : std::uint8_t { Clip, Strip, Keep }; + +struct Token { + TokenType type; + std::string_view value; + std::uint64_t line; + std::uint64_t column; + std::size_t position{0}; + ScalarStyle scalar_style{ScalarStyle::Plain}; + BlockChomping chomping{BlockChomping::Clip}; + bool multiline{false}; + std::string_view block_original{}; + std::string_view quoted_original{}; + std::size_t explicit_indent{0}; + bool indent_before_chomping{false}; + bool compact_separator{false}; +}; + +class Lexer { +public: + Lexer(const std::string_view input, const bool roundtrip_mode = false) + : input_{input}, roundtrip_{roundtrip_mode} {} + + auto next() -> std::optional { + if (this->roundtrip_) { + this->inline_comment_buffer_.reset(); + } + this->skip_whitespace_and_comments(); + if (this->roundtrip_) { + this->comment_reference_line_ = this->line_; + } + + if (this->position_ >= this->input_.size()) { + if (!this->stream_started_) { + this->stream_started_ = true; + return Token{.type = TokenType::StreamStart, + .value = {}, + .line = this->line_, + .column = this->column_}; + } + if (!this->stream_ended_) { + this->stream_ended_ = true; + const auto end_line{this->column_ > 0 ? this->line_ + 1 : this->line_}; + const std::uint64_t end_column{0}; + return Token{.type = TokenType::StreamEnd, + .value = {}, + .line = end_line, + .column = end_column}; + } + return std::nullopt; + } + + if (!this->stream_started_) { + this->stream_started_ = true; + return Token{.type = TokenType::StreamStart, + .value = {}, + .line = this->line_, + .column = this->column_}; + } + + const auto current_line{this->line_}; + const auto current_column{this->column_}; + const auto current_position{this->position_}; + + if (this->tab_at_line_start_) { + this->tab_at_line_start_ = false; + const char next_char{this->peek()}; + if (next_char != '{' && next_char != '[') { + throw YAMLParseError{current_line, current_column, + "Tab characters cannot be used for indentation"}; + } + } + + if (this->column_ == 1 && this->check_document_marker('-')) { + this->advance(3); + return Token{.type = TokenType::DocumentStart, + .value = "---", + .line = current_line, + .column = current_column, + .position = current_position}; + } + + if (this->column_ == 1 && this->check_document_marker('.')) { + this->advance(3); + this->validate_trailing_content(); + return Token{.type = TokenType::DocumentEnd, + .value = "...", + .line = current_line, + .column = current_column, + .position = current_position}; + } + + const char current{this->peek()}; + + if (current == '{') { + this->advance(1); + this->flow_level_++; + return Token{.type = TokenType::MappingStart, + .value = "{", + .line = current_line, + .column = current_column}; + } + + if (current == '[') { + this->advance(1); + this->flow_level_++; + return Token{.type = TokenType::SequenceStart, + .value = "[", + .line = current_line, + .column = current_column}; + } + + if (this->flow_level_ > 0) { + if (current == '}') { + this->advance(1); + this->flow_level_--; + if (this->flow_level_ == 0) { + this->validate_trailing_content(); + } + return Token{.type = TokenType::MappingEnd, + .value = "}", + .line = current_line, + .column = current_column}; + } + if (current == ']') { + this->advance(1); + this->flow_level_--; + if (this->flow_level_ == 0) { + this->validate_trailing_content(); + } + return Token{.type = TokenType::SequenceEnd, + .value = "]", + .line = current_line, + .column = current_column}; + } + if (current == ',') { + this->advance(1); + const bool compact{this->roundtrip_ && + this->position_ < this->input_.size() && + this->peek() != ' ' && this->peek() != '\n' && + this->peek() != '\r'}; + return Token{.type = TokenType::FlowEntry, + .value = ",", + .line = current_line, + .column = current_column, + .compact_separator = compact}; + } + } else if (current == '-' && this->is_followed_by_whitespace()) { + this->advance(1); + return Token{.type = TokenType::BlockSequenceEntry, + .value = "-", + .line = current_line, + .column = current_column}; + } + + if (current == '?' && this->is_followed_by_whitespace()) { + this->advance(1); + return Token{.type = TokenType::BlockMappingKey, + .value = "?", + .line = current_line, + .column = current_column}; + } + + if (current == ':' && this->is_value_indicator()) { + this->advance(1); + this->last_was_quoted_scalar_ = false; + return Token{.type = TokenType::BlockMappingValue, + .value = ":", + .line = current_line, + .column = current_column}; + } + + if (current == '&') { + return this->scan_anchor_or_alias(TokenType::Anchor); + } + + if (current == '*') { + return this->scan_anchor_or_alias(TokenType::Alias); + } + + if (current == '!') { + return this->scan_tag(); + } + + if (current == '%') { + return this->scan_directive(); + } + + if (current == '\'') { + this->last_was_quoted_scalar_ = true; + return this->scan_single_quoted_scalar(); + } + + if (current == '"') { + this->last_was_quoted_scalar_ = true; + return this->scan_double_quoted_scalar(); + } + + if (current == '|') { + this->last_was_quoted_scalar_ = false; + return this->scan_block_scalar(ScalarStyle::Literal); + } + + if (current == '>') { + this->last_was_quoted_scalar_ = false; + return this->scan_block_scalar(ScalarStyle::Folded); + } + + if (current == '#') { + throw YAMLParseError{current_line, current_column, + "Unexpected '#' character"}; + } + + this->last_was_quoted_scalar_ = false; + return this->scan_plain_scalar(); + } + +public: + [[nodiscard]] auto line() const noexcept -> std::uint64_t { + if (this->position_ >= this->input_.size() && this->column_ > 1) { + return this->line_ + 1; + } + return this->line_; + } + + [[nodiscard]] auto column() const noexcept -> std::uint64_t { + if (this->position_ >= this->input_.size()) { + return 0; + } + return this->column_; + } + + [[nodiscard]] auto flow_level() const noexcept -> std::size_t { + return this->flow_level_; + } + + auto set_block_indent(const std::size_t indent) noexcept -> void { + this->block_indent_ = indent; + } + + [[nodiscard]] auto block_indent() const noexcept -> std::size_t { + return this->block_indent_; + } + + [[nodiscard]] auto position() const noexcept -> std::size_t { + return this->position_; + } + + auto take_inline_comment() -> std::optional { + auto result{std::move(this->inline_comment_buffer_)}; + this->inline_comment_buffer_.reset(); + return result; + } + + auto take_preceding_comments() -> std::vector { + auto result{std::move(this->preceding_comments_buffer_)}; + this->preceding_comments_buffer_.clear(); + return result; + } + + auto take_block_scalar_comment() -> std::optional { + auto result{std::move(this->block_scalar_comment_)}; + this->block_scalar_comment_.reset(); + return result; + } + +private: + [[nodiscard]] static auto is_whitespace(const char character) noexcept + -> bool { + return character == ' ' || character == '\t' || character == '\n' || + character == '\r'; + } + + [[nodiscard]] static auto is_flow_indicator(const char character) noexcept + -> bool { + return character == ',' || character == '[' || character == ']' || + character == '{' || character == '}'; + } + + [[nodiscard]] auto peek(const std::size_t offset = 0) const noexcept -> char { + const auto index{this->position_ + offset}; + if (index >= this->input_.size()) { + return '\0'; + } + return this->input_[index]; + } + + auto advance(const std::size_t count) noexcept -> void { + for (std::size_t index = 0; index < count; ++index) { + if (this->position_ >= this->input_.size()) { + break; + } + if (this->input_[this->position_] == '\n') { + this->line_++; + this->column_ = 1; + } else { + this->column_++; + } + this->position_++; + } + } + + auto skip_whitespace_and_comments() -> void { + bool preceded_by_whitespace{ + this->column_ == 1 || + (this->position_ > 0 && + is_whitespace(this->input_[this->position_ - 1]))}; + bool at_line_start{this->column_ == 1}; + bool blank_line{at_line_start}; + this->tab_at_line_start_ = false; + while (this->position_ < this->input_.size()) { + const char current{this->peek()}; + + if (current == ' ') { + preceded_by_whitespace = true; + this->advance(1); + continue; + } + + if (current == '\t') { + if (this->flow_level_ == 0 && at_line_start) { + this->tab_at_line_start_ = true; + } + preceded_by_whitespace = true; + this->advance(1); + continue; + } + + if (current == '\n' || current == '\r') { + if (this->roundtrip_ && blank_line) { + this->preceding_comments_buffer_.emplace_back(); + } + this->advance(1); + if (current == '\r' && this->peek() == '\n') { + this->advance(1); + } + preceded_by_whitespace = true; + at_line_start = true; + blank_line = true; + this->tab_at_line_start_ = false; + continue; + } + + if (current == '#' && preceded_by_whitespace) { + blank_line = false; + const auto comment_line{this->line_}; + const auto comment_start{this->position_}; + while (this->position_ < this->input_.size() && this->peek() != '\n') { + this->advance(1); + } + if (this->roundtrip_) { + std::string text{this->input_.substr( + comment_start, this->position_ - comment_start)}; + if (comment_line == this->comment_reference_line_ && + this->comment_reference_line_ > 0 && + !this->inline_comment_buffer_.has_value()) { + this->inline_comment_buffer_ = std::move(text); + } else { + this->preceding_comments_buffer_.push_back(std::move(text)); + } + } + continue; + } + + break; + } + } + + [[nodiscard]] auto check_document_marker(const char marker) const noexcept + -> bool { + if (this->position_ + 2 >= this->input_.size()) { + return false; + } + if (this->input_[this->position_] != marker || + this->input_[this->position_ + 1] != marker || + this->input_[this->position_ + 2] != marker) { + return false; + } + if (this->position_ + 3 < this->input_.size()) { + const char after{this->input_[this->position_ + 3]}; + return is_whitespace(after) || after == '\0'; + } + return true; + } + + [[nodiscard]] auto is_followed_by_whitespace() const noexcept -> bool { + if (this->position_ + 1 >= this->input_.size()) { + return true; + } + return is_whitespace(this->input_[this->position_ + 1]); + } + + [[nodiscard]] auto is_value_indicator() const noexcept -> bool { + if (this->flow_level_ > 0) { + if (this->last_was_quoted_scalar_) { + return true; + } + if (this->position_ + 1 >= this->input_.size()) { + return true; + } + const char after{this->input_[this->position_ + 1]}; + return is_whitespace(after) || is_flow_indicator(after); + } + return this->is_followed_by_whitespace(); + } + + [[nodiscard]] auto line_contains_mapping_key() const noexcept -> bool { + auto scan_position{this->position_}; + while (scan_position < this->input_.size()) { + const char character{this->input_[scan_position]}; + if (character == '\n' || character == '\r') { + return false; + } + if (character == ':') { + if (scan_position + 1 >= this->input_.size()) { + return true; + } + const char after{this->input_[scan_position + 1]}; + if (is_whitespace(after)) { + return true; + } + } + scan_position++; + } + return false; + } + + auto scan_anchor_or_alias(const TokenType type) -> Token { + const auto start_line{this->line_}; + const auto start_column{this->column_}; + const auto start_position{this->position_}; + this->advance(1); + while (this->position_ < this->input_.size()) { + const char current{this->peek()}; + if (is_whitespace(current) || is_flow_indicator(current)) { + break; + } + this->advance(1); + } + const auto length{this->position_ - start_position}; + return Token{.type = type, + .value = this->input_.substr(start_position + 1, length - 1), + .line = start_line, + .column = start_column}; + } + + auto scan_tag() -> Token { + const auto start_line{this->line_}; + const auto start_column{this->column_}; + const auto start_position{this->position_}; + + this->advance(1); + + if (this->peek() == '<') { + this->advance(1); + while (this->position_ < this->input_.size() && this->peek() != '>') { + this->advance(1); + } + if (this->peek() == '>') { + this->advance(1); + } + } else { + while (this->position_ < this->input_.size()) { + const char current{this->peek()}; + if (is_whitespace(current) || is_flow_indicator(current)) { + break; + } + this->advance(1); + } + } + + const auto length{this->position_ - start_position}; + + if (this->position_ < this->input_.size() && this->flow_level_ == 0) { + const char after_tag{this->peek()}; + if (after_tag == ',') { + throw YAMLParseError{this->line_, this->column_, + "Invalid character after tag in block context"}; + } + } + + return Token{.type = TokenType::Tag, + .value = this->input_.substr(start_position, length), + .line = start_line, + .column = start_column}; + } + + auto scan_directive() -> Token { + const auto start_line{this->line_}; + const auto start_column{this->column_}; + const auto start_position{this->position_}; + + this->advance(1); + + while (this->position_ < this->input_.size() && this->peek() != '\n' && + this->peek() != '\r') { + this->advance(1); + } + + const auto length{this->position_ - start_position}; + const auto directive_content{this->input_.substr(start_position, length)}; + + TokenType token_type{TokenType::DirectiveReserved}; + if (directive_content.starts_with("%YAML")) { + token_type = TokenType::DirectiveYAML; + } else if (directive_content.starts_with("%TAG")) { + token_type = TokenType::DirectiveTag; + } + + return Token{.type = token_type, + .value = directive_content, + .line = start_line, + .column = start_column}; + } + + auto scan_single_quoted_scalar() -> Token { + const auto start_line{this->line_}; + const auto start_column{this->column_}; + const auto quote_start{this->position_}; + + this->advance(1); + + auto &buffer{this->get_buffer()}; + std::string line_content; + bool first_line{true}; + std::size_t pending_newlines{0}; + bool found_closing_quote{false}; + + while (this->position_ < this->input_.size()) { + const char current{this->peek()}; + + if (current == '\'') { + if (this->peek(1) == '\'') { + line_content += '\''; + this->advance(2); + } else { + this->flush_flow_line(buffer, line_content, pending_newlines, + first_line, true); + this->advance(1); + found_closing_quote = true; + break; + } + } else if (current == '\n' || current == '\r') { + this->flush_flow_line(buffer, line_content, pending_newlines, + first_line); + first_line = false; + this->skip_flow_scalar_line_break(current, pending_newlines); + } else { + line_content += current; + this->advance(1); + } + } + + if (!found_closing_quote) { + throw YAMLParseError{start_line, start_column, + "Missing closing quote in single-quoted scalar"}; + } + + this->validate_trailing_content(); + + const auto quoted_raw{ + this->roundtrip_ + ? this->input_.substr(quote_start + 1, + this->position_ - quote_start - 2) + : std::string_view{}}; + return Token{.type = TokenType::Scalar, + .value = buffer, + .line = start_line, + .column = start_column, + .scalar_style = ScalarStyle::SingleQuoted, + .multiline = !first_line, + .quoted_original = quoted_raw}; + } + + auto flush_flow_line(std::string &buffer, std::string &line_content, + std::size_t &pending_newlines, const bool first_line, + const bool is_final = false) -> void { + if (!is_final) { + while (!line_content.empty() && + (line_content.back() == ' ' || line_content.back() == '\t')) { + line_content.pop_back(); + } + } + + if (pending_newlines > 0 && !first_line) { + if (pending_newlines == 1) { + buffer += ' '; + } else { + for (std::size_t count = 1; count < pending_newlines; ++count) { + buffer += '\n'; + } + } + pending_newlines = 0; + } + + if (!line_content.empty()) { + buffer += line_content; + } + line_content.clear(); + } + + auto skip_flow_scalar_line_break(const char current, + std::size_t &pending_newlines) -> void { + this->advance(1); + if (current == '\r' && this->peek() == '\n') { + this->advance(1); + } + pending_newlines++; + while (this->position_ < this->input_.size()) { + const char character{this->peek()}; + if (character == ' ' || character == '\t') { + this->advance(1); + } else if (character == '\n') { + pending_newlines++; + this->advance(1); + } else if (character == '\r') { + pending_newlines++; + this->advance(1); + if (this->peek() == '\n') { + this->advance(1); + } + } else { + break; + } + } + this->validate_flow_scalar_continuation(); + } + + auto validate_flow_scalar_continuation() -> void { + if (this->position_ >= this->input_.size()) { + return; + } + if (this->column_ == 1 && (this->check_document_marker('-') || + this->check_document_marker('.'))) { + throw YAMLParseError{this->line_, this->column_, + "Document marker inside flow scalar"}; + } + if (this->flow_level_ == 0 && this->block_indent_ != SIZE_MAX) { + const auto current_indent{static_cast(this->column_ - 1)}; + if (current_indent <= this->block_indent_) { + throw YAMLParseError{this->line_, this->column_, + "Insufficient indentation in flow scalar"}; + } + } + } + + auto validate_trailing_content() -> void { + auto lookahead{this->position_}; + bool seen_whitespace{false}; + while (lookahead < this->input_.size()) { + const char character{this->input_[lookahead]}; + if (character == ' ' || character == '\t') { + seen_whitespace = true; + lookahead++; + continue; + } + if (character == '\n' || character == '\r') { + return; + } + if (character == '#') { + if (seen_whitespace) { + return; + } + throw YAMLParseError{this->line_, this->column_, + "Invalid trailing content"}; + } + if (character == ':') { + return; + } + if (this->flow_level_ > 0 && is_flow_indicator(character)) { + return; + } + throw YAMLParseError{this->line_, this->column_, + "Invalid trailing content"}; + } + } + + auto scan_double_quoted_scalar() -> Token { + const auto start_line{this->line_}; + const auto start_column{this->column_}; + const auto quote_start{this->position_}; + + this->advance(1); + + auto &buffer{this->get_buffer()}; + std::string line_content; + bool first_line{true}; + std::size_t pending_newlines{0}; + bool found_closing_quote{false}; + + while (this->position_ < this->input_.size()) { + const char current{this->peek()}; + + if (current == '"') { + this->flush_flow_line(buffer, line_content, pending_newlines, + first_line, true); + this->advance(1); + found_closing_quote = true; + break; + } + + if (current == '\\') { + this->advance(1); + if (this->position_ < this->input_.size()) { + const char escaped{this->peek()}; + switch (escaped) { + case '0': + line_content += '\0'; + break; + case 'a': + line_content += '\a'; + break; + case 'b': + line_content += '\b'; + break; + case 't': + case '\t': + line_content += '\t'; + break; + case 'n': + line_content += '\n'; + break; + case 'v': + line_content += '\v'; + break; + case 'f': + line_content += '\f'; + break; + case 'r': + line_content += '\r'; + break; + case 'e': + line_content += '\x1b'; + break; + case ' ': + line_content += ' '; + break; + case '"': + line_content += '"'; + break; + case '/': + line_content += '/'; + break; + case '\\': + line_content += '\\'; + break; + case 'N': + line_content += "\xc2\x85"; + break; + case '_': + line_content += "\xc2\xa0"; + break; + case 'L': + line_content += "\xe2\x80\xa8"; + break; + case 'P': + line_content += "\xe2\x80\xa9"; + break; + case 'x': + this->advance(1); + line_content += this->parse_hex_escape(2); + continue; + case 'u': + this->advance(1); + line_content += this->parse_hex_escape(4); + continue; + case 'U': + this->advance(1); + line_content += this->parse_hex_escape(8); + continue; + case '\n': + case '\r': + if (escaped == '\r' && this->peek(1) == '\n') { + this->advance(1); + } + this->advance(1); + while (this->position_ < this->input_.size() && + (this->peek() == ' ' || this->peek() == '\t')) { + this->advance(1); + } + continue; + default: + throw YAMLParseError{this->line_, this->column_, + "Invalid escape sequence in " + "double-quoted scalar"}; + } + this->advance(1); + } + } else if (current == '\n' || current == '\r') { + this->flush_flow_line(buffer, line_content, pending_newlines, + first_line); + first_line = false; + this->skip_flow_scalar_line_break(current, pending_newlines); + } else { + line_content += current; + this->advance(1); + } + } + + if (!found_closing_quote) { + throw YAMLParseError{start_line, start_column, + "Missing closing quote in double-quoted scalar"}; + } + + this->validate_trailing_content(); + + const auto quoted_raw{ + this->roundtrip_ + ? this->input_.substr(quote_start + 1, + this->position_ - quote_start - 2) + : std::string_view{}}; + return Token{.type = TokenType::Scalar, + .value = buffer, + .line = start_line, + .column = start_column, + .scalar_style = ScalarStyle::DoubleQuoted, + .multiline = !first_line, + .quoted_original = quoted_raw}; + } + + auto parse_hex_escape(const std::size_t digits) -> std::string { + std::string hex; + for (std::size_t index = 0; + index < digits && this->position_ < this->input_.size(); ++index) { + hex += this->peek(); + this->advance(1); + } + + if (hex.size() != digits) { + throw YAMLParseError{this->line_, this->column_, + "Truncated hex escape sequence"}; + } + + std::size_t parsed{0}; + unsigned long codepoint{}; + try { + codepoint = std::stoul(hex, &parsed, 16); + } catch (...) { + throw YAMLParseError{this->line_, this->column_, + "Invalid hex escape sequence"}; + } + + if (parsed != hex.size()) { + throw YAMLParseError{this->line_, this->column_, + "Invalid hex escape sequence"}; + } + + return codepoint_to_utf8(static_cast(codepoint)); + } + + [[nodiscard]] auto calculate_parent_indentation( + const std::size_t indicator_position) const noexcept -> std::size_t { + std::size_t line_start{indicator_position}; + while (line_start > 0 && this->input_[line_start - 1] != '\n' && + this->input_[line_start - 1] != '\r') { + line_start--; + } + + std::size_t leading_spaces{0}; + std::size_t scan_position{line_start}; + while (scan_position < this->input_.size() && + this->input_[scan_position] == ' ') { + leading_spaces++; + scan_position++; + } + + bool in_sequence_entry{false}; + if (scan_position < this->input_.size() - 1 && + this->input_[scan_position] == '-' && + this->input_[scan_position + 1] == ' ') { + in_sequence_entry = true; + } + + bool is_mapping_value_same_line{false}; + for (std::size_t index = line_start; index < indicator_position; ++index) { + if (this->input_[index] == ':') { + is_mapping_value_same_line = true; + break; + } + } + + if (in_sequence_entry && is_mapping_value_same_line) { + return leading_spaces + 2; + } + + if (is_mapping_value_same_line) { + return leading_spaces; + } + + return 0; + } + + auto detect_block_scalar_indent(const std::size_t explicit_indent, + const std::size_t indicator_position, + const std::uint64_t start_line, + const std::uint64_t start_column) + -> std::size_t { + std::size_t content_indent{0}; + + if (explicit_indent > 0) { + const auto parent_indent{ + this->calculate_parent_indentation(indicator_position)}; + content_indent = parent_indent + explicit_indent; + } else { + const auto saved_position{this->position_}; + const auto saved_line{this->line_}; + const auto saved_column{this->column_}; + + std::size_t max_leading_empty_indent{0}; + std::size_t current_empty_indent{0}; + while (this->position_ < this->input_.size()) { + if (this->peek() == ' ') { + content_indent++; + current_empty_indent++; + this->advance(1); + } else if (this->peek() == '\n' || this->peek() == '\r') { + if (current_empty_indent > max_leading_empty_indent) { + max_leading_empty_indent = current_empty_indent; + } + content_indent = 0; + current_empty_indent = 0; + this->advance(1); + } else { + break; + } + } + + this->position_ = saved_position; + this->line_ = saved_line; + this->column_ = saved_column; + + if (max_leading_empty_indent > content_indent && content_indent > 0) { + throw YAMLParseError{ + start_line, start_column, + "Leading empty line has more spaces than content indentation"}; + } + } + + if (content_indent == 0 && start_column > 5) { + content_indent = 1; + } + + return content_indent; + } + + auto scan_block_scalar(const ScalarStyle style) -> Token { + const auto start_line{this->line_}; + const auto start_column{this->column_}; + const auto indicator_position{this->position_}; + + this->advance(1); + + char chomping{'c'}; + std::size_t explicit_indent{0}; + bool indent_first{false}; + + bool seen_header_whitespace{false}; + while (this->position_ < this->input_.size()) { + const char current{this->peek()}; + if (current == '-') { + chomping = '-'; + this->advance(1); + } else if (current == '+') { + chomping = '+'; + this->advance(1); + } else if (current >= '1' && current <= '9') { + explicit_indent = static_cast(current - '0'); + if (chomping == 'c') { + indent_first = true; + } + this->advance(1); + } else if (current == ' ' || current == '\t') { + seen_header_whitespace = true; + this->advance(1); + } else if (current == '#' && seen_header_whitespace) { + const auto comment_start{this->position_}; + while (this->position_ < this->input_.size() && this->peek() != '\n') { + this->advance(1); + } + if (this->roundtrip_) { + this->block_scalar_comment_ = std::string{this->input_.substr( + comment_start, this->position_ - comment_start)}; + } + } else if (current == '\n' || current == '\r') { + break; + } else { + throw YAMLParseError{this->line_, this->column_, + "Invalid content in block scalar header"}; + } + } + + if (this->peek() == '\n' || this->peek() == '\r') { + this->advance(1); + if (this->input_[this->position_ - 1] == '\r' && this->peek() == '\n') { + this->advance(1); + } + } + + auto &buffer{this->get_buffer()}; + + // For folded scalars in roundtrip mode, build a parallel buffer that + // preserves original line breaks (literal-style) for round-trip output + const bool build_original{style == ScalarStyle::Folded && this->roundtrip_}; + std::string *original{nullptr}; + std::string original_trailing; + if (build_original) { + original = &this->get_buffer(); + } + + const auto content_indent{this->detect_block_scalar_indent( + explicit_indent, indicator_position, start_line, start_column)}; + + std::size_t blank_line_count{0}; + bool previous_was_more_indented{false}; + bool previous_started_with_whitespace{false}; + bool had_line_break{false}; + std::string trailing_newlines; + + while (this->position_ < this->input_.size()) { + std::size_t line_indent{0}; + while (this->position_ < this->input_.size() && this->peek() == ' ') { + line_indent++; + this->advance(1); + } + + if (this->peek() == '\n' || this->peek() == '\r') { + if (style == ScalarStyle::Literal) { + if (line_indent > content_indent) { + buffer += trailing_newlines; + trailing_newlines.clear(); + for (std::size_t index = content_indent; index < line_indent; + ++index) { + buffer += ' '; + } + } + trailing_newlines += '\n'; + } else { + blank_line_count++; + if (original) { + if (line_indent > content_indent) { + *original += original_trailing; + original_trailing.clear(); + for (std::size_t index = content_indent; index < line_indent; + ++index) { + *original += ' '; + } + } + original_trailing += '\n'; + } + } + this->advance(1); + if (this->input_[this->position_ - 1] == '\r' && this->peek() == '\n') { + this->advance(1); + } + continue; + } + + if (line_indent < content_indent) { + for (std::size_t index = 0; index < line_indent; ++index) { + this->position_--; + this->column_--; + } + break; + } + + if (line_indent == 0 && this->position_ + 2 < this->input_.size()) { + if ((this->peek() == '-' && this->peek(1) == '-' && + this->peek(2) == '-') || + (this->peek() == '.' && this->peek(1) == '.' && + this->peek(2) == '.')) { + break; + } + } + + if (style == ScalarStyle::Literal) { + buffer += trailing_newlines; + trailing_newlines.clear(); + } else { + const bool starts_with_whitespace{this->peek() == '\t'}; + if (had_line_break) { + const bool preserve_line_break{ + previous_was_more_indented || previous_started_with_whitespace || + line_indent > content_indent || starts_with_whitespace}; + if (blank_line_count == 0 && !preserve_line_break) { + buffer += ' '; + } else { + if (preserve_line_break) { + buffer += '\n'; + } + for (std::size_t count = 0; count < blank_line_count; ++count) { + buffer += '\n'; + } + } + } else if (blank_line_count > 0) { + for (std::size_t count = 0; count < blank_line_count; ++count) { + buffer += '\n'; + } + } + blank_line_count = 0; + had_line_break = false; + previous_started_with_whitespace = starts_with_whitespace; + + if (original) { + *original += original_trailing; + original_trailing.clear(); + } + } + + for (std::size_t index = content_indent; index < line_indent; ++index) { + buffer += ' '; + if (original) { + *original += ' '; + } + } + + while (this->position_ < this->input_.size() && this->peek() != '\n' && + this->peek() != '\r') { + const auto character{this->peek()}; + buffer += character; + if (original) { + *original += character; + } + this->advance(1); + } + + if (style == ScalarStyle::Folded) { + previous_was_more_indented = (line_indent > content_indent); + } + + if (this->peek() == '\n' || this->peek() == '\r') { + if (style == ScalarStyle::Literal) { + trailing_newlines += '\n'; + } else { + had_line_break = true; + if (original) { + original_trailing += '\n'; + } + } + this->advance(1); + if (this->input_[this->position_ - 1] == '\r' && this->peek() == '\n') { + this->advance(1); + } + } + } + + if (chomping == '+') { + if (style == ScalarStyle::Literal) { + buffer += trailing_newlines; + } else { + if (had_line_break) { + buffer += '\n'; + } + for (std::size_t count = 0; count < blank_line_count; ++count) { + buffer += '\n'; + } + if (original) { + *original += original_trailing; + } + } + } else if (chomping == 'c' && !buffer.empty()) { + if (style == ScalarStyle::Literal) { + if (!trailing_newlines.empty()) { + buffer += '\n'; + } + } else if (had_line_break || blank_line_count > 0) { + buffer += '\n'; + if (original && !original_trailing.empty()) { + *original += '\n'; + } + } + } + + BlockChomping block_chomping{BlockChomping::Clip}; + if (chomping == '-') { + block_chomping = BlockChomping::Strip; + } else if (chomping == '+') { + block_chomping = BlockChomping::Keep; + } + + return Token{.type = TokenType::Scalar, + .value = buffer, + .line = start_line, + .column = start_column, + .scalar_style = style, + .chomping = block_chomping, + .block_original = original ? std::string_view{*original} + : std::string_view{}, + .explicit_indent = explicit_indent, + .indent_before_chomping = indent_first}; + } + + auto scan_plain_scalar() -> Token { + const auto start_line{this->line_}; + const auto start_column{this->column_}; + const auto start_position{this->position_}; + const bool in_flow{this->flow_level_ > 0}; + + if (in_flow) { + const char first{this->peek()}; + if (first == '-' || first == '?' || first == ':') { + const char after{this->peek(1)}; + if (after == '\0' || is_whitespace(after) || is_flow_indicator(after)) { + throw YAMLParseError{start_line, start_column, + "Invalid plain scalar start in flow context"}; + } + } + } + + const std::size_t min_indent{ + in_flow + ? 0 + : (this->block_indent_ == SIZE_MAX ? 0 : this->block_indent_ + 1)}; + bool used_multiline{false}; + std::string pending_whitespace; + std::string *buffer{nullptr}; + + while (this->position_ < this->input_.size()) { + const auto line_start{this->position_}; + + while (this->position_ < this->input_.size()) { + const char current{this->peek()}; + + if (current == ':') { + const char after{this->peek(1)}; + if (after == '\0' || is_whitespace(after)) { + break; + } + if (in_flow && is_flow_indicator(after)) { + break; + } + } + + if (current == '#') { + if (this->position_ > line_start) { + const char before{this->input_[this->position_ - 1]}; + if (before == ' ' || before == '\t') { + break; + } + } + } + + if (in_flow && is_flow_indicator(current)) { + break; + } + + if (current == '\n' || current == '\r') { + break; + } + + this->advance(1); + } + + auto segment_end{this->position_}; + while (segment_end > line_start && + (this->input_[segment_end - 1] == ' ' || + this->input_[segment_end - 1] == '\t')) { + segment_end--; + } + + const auto segment{ + this->input_.substr(line_start, segment_end - line_start)}; + + if (!segment.empty()) { + if (used_multiline) { + *buffer += pending_whitespace; + } + if (buffer != nullptr) { + *buffer += segment; + } + pending_whitespace.clear(); + } + + if (this->position_ >= this->input_.size()) { + break; + } + + const char current{this->peek()}; + if (current != '\n' && current != '\r') { + break; + } + + const auto saved_position{this->position_}; + const auto saved_line{this->line_}; + const auto saved_column{this->column_}; + + std::size_t newline_count{0}; + std::size_t next_line_indent{0}; + + while (this->position_ < this->input_.size()) { + const char character{this->peek()}; + if (character == '\n') { + newline_count++; + this->advance(1); + next_line_indent = 0; + } else if (character == '\r') { + newline_count++; + this->advance(1); + if (this->peek() == '\n') { + this->advance(1); + } + next_line_indent = 0; + } else if (character == ' ' || character == '\t') { + next_line_indent++; + this->advance(1); + } else { + break; + } + } + + if (this->position_ >= this->input_.size()) { + this->position_ = saved_position; + this->line_ = saved_line; + this->column_ = saved_column; + break; + } + + if (next_line_indent < min_indent) { + this->position_ = saved_position; + this->line_ = saved_line; + this->column_ = saved_column; + break; + } + + const char next_char{this->peek()}; + + if (in_flow && is_flow_indicator(next_char)) { + this->position_ = saved_position; + this->line_ = saved_line; + this->column_ = saved_column; + break; + } + + if (next_char == '-' || next_char == '?' || next_char == ':') { + const char after{this->peek(1)}; + if (after == '\0' || is_whitespace(after)) { + if (next_line_indent == 0 || start_column < 3 || + next_line_indent <= start_column - 3) { + this->position_ = saved_position; + this->line_ = saved_line; + this->column_ = saved_column; + break; + } + } + if (in_flow && next_char == ':') { + if (is_flow_indicator(after)) { + this->position_ = saved_position; + this->line_ = saved_line; + this->column_ = saved_column; + break; + } + } + } + + if (!in_flow && this->line_contains_mapping_key()) { + this->position_ = saved_position; + this->line_ = saved_line; + this->column_ = saved_column; + break; + } + + if (next_line_indent == 0) { + if ((next_char == '-' && this->peek(1) == '-' && + this->peek(2) == '-') || + (next_char == '.' && this->peek(1) == '.' && + this->peek(2) == '.')) { + this->position_ = saved_position; + this->line_ = saved_line; + this->column_ = saved_column; + break; + } + } + + if (next_char == '#') { + this->position_ = saved_position; + this->line_ = saved_line; + this->column_ = saved_column; + break; + } + + if (!used_multiline) { + buffer = &this->get_buffer(); + *buffer = + this->input_.substr(start_position, segment_end - start_position); + } + used_multiline = true; + if (newline_count == 1) { + pending_whitespace = " "; + } else { + pending_whitespace = std::string(newline_count - 1, '\n'); + } + } + + if (used_multiline && buffer != nullptr) { + auto raw_end{this->position_}; + while (raw_end > start_position && (this->input_[raw_end - 1] == ' ' || + this->input_[raw_end - 1] == '\t')) { + raw_end--; + } + return Token{.type = TokenType::Scalar, + .value = *buffer, + .line = start_line, + .column = start_column, + .scalar_style = ScalarStyle::Plain, + .multiline = true, + .block_original = + this->roundtrip_ + ? this->input_.substr(start_position, + raw_end - start_position) + : std::string_view{}}; + } + + auto length{this->position_ - start_position}; + while (length > 0 && (this->input_[start_position + length - 1] == ' ' || + this->input_[start_position + length - 1] == '\t')) { + length--; + } + + return Token{.type = TokenType::Scalar, + .value = this->input_.substr(start_position, length), + .line = start_line, + .column = start_column, + .scalar_style = ScalarStyle::Plain}; + } + + auto get_buffer() -> std::string & { + this->scalar_buffers_.emplace_back(); + return this->scalar_buffers_.back(); + } + + std::string_view input_; + std::size_t position_{0}; + std::uint64_t line_{1}; + std::uint64_t column_{1}; + std::size_t flow_level_{0}; + bool stream_started_{false}; + bool stream_ended_{false}; + bool last_was_quoted_scalar_{false}; + bool tab_at_line_start_{false}; + bool roundtrip_{false}; + std::uint64_t comment_reference_line_{0}; + std::optional inline_comment_buffer_; + std::optional block_scalar_comment_; + std::vector preceding_comments_buffer_; + // SIZE_MAX means "not set" (top-level), 0 means parent at indent 0 + std::size_t block_indent_{SIZE_MAX}; + std::deque scalar_buffers_; +}; + +} // namespace sourcemeta::core::yaml + +#endif diff --git a/vendor/core/src/core/yaml/parser.h b/vendor/core/src/core/yaml/parser.h new file mode 100644 index 00000000..34daea52 --- /dev/null +++ b/vendor/core/src/core/yaml/parser.h @@ -0,0 +1,1961 @@ +#ifndef SOURCEMETA_CORE_YAML_PARSER_H_ +#define SOURCEMETA_CORE_YAML_PARSER_H_ + +#include "lexer.h" + +#include +#include +#include +#include +#include + +#include // assert +#include // std::uint64_t +#include // std::optional +#include // std::ostringstream +#include // std::string +#include // std::string_view +#include // std::unordered_map +#include // std::unordered_set +#include // std::move +#include // std::vector + +namespace sourcemeta::core::yaml { + +struct CallbackRecord { + JSON::ParsePhase phase; + JSON::Type type; + std::uint64_t line; + std::uint64_t column; + JSON::ParseContext context; + std::size_t index; + std::string property; +}; + +struct AnchoredValue { + JSON value; + std::vector callbacks; +}; + +class Parser { +public: + Parser(Lexer *lexer, const JSON::ParseCallback *callback, + YAMLRoundTrip *roundtrip = nullptr) + : lexer_{lexer}, callback_{callback}, roundtrip_{roundtrip} {} + + auto parse() -> JSON { + std::optional token; + + if (!this->pending_tokens_.empty()) { + token = this->pending_tokens_.front(); + this->pending_tokens_.pop_front(); + if (this->pending_tokens_.empty()) { + this->pending_token_position_.reset(); + } + } else { + token = this->lexer_->next(); + if (!token.has_value() || token->type != TokenType::StreamStart) { + throw YAMLParseError{this->lexer_->line(), this->lexer_->column(), + "Expected stream start"}; + } + token = this->lexer_->next(); + } + + if (!token.has_value() || token->type == TokenType::StreamEnd) { + throw YAMLParseError{1, 1, "Empty YAML document"}; + } + + if (token->type == TokenType::DirectiveYAML || + token->type == TokenType::DirectiveTag || + token->type == TokenType::DirectiveReserved) { + this->process_directives(token.value()); + } + + if (token->type == TokenType::DocumentStart) { + if (this->roundtrip_) { + this->roundtrip_->leading_comments = + this->lexer_->take_preceding_comments(); + this->roundtrip_->explicit_document_start = true; + } + this->document_start_line_ = token->line; + const auto pos_before_next{this->lexer_->position()}; + token = this->lexer_->next(); + if (this->roundtrip_) { + this->roundtrip_->document_start_comment = + this->lexer_->take_inline_comment(); + } + + if (!token.has_value() || token->type == TokenType::StreamEnd || + token->type == TokenType::DocumentEnd || + token->type == TokenType::DocumentStart) { + if (token.has_value() && token->type == TokenType::DocumentStart) { + this->pending_tokens_.push_back(token.value()); + this->pending_token_position_ = pos_before_next; + } + return JSON{nullptr}; + } + } else if (!token.has_value() || token->type == TokenType::StreamEnd) { + throw YAMLParseError{1, 1, "Empty YAML document"}; + } else if (token->type == TokenType::DocumentEnd) { + while (token.has_value() && token->type == TokenType::DocumentEnd) { + token = this->lexer_->next(); + } + if (!token.has_value() || token->type == TokenType::StreamEnd) { + throw YAMLParseError{1, 1, "Empty YAML document"}; + } + this->pending_tokens_.push_back(token.value()); + return JSON{nullptr}; + } + + if (this->roundtrip_) { + auto comments{this->lexer_->take_preceding_comments()}; + this->lexer_->take_inline_comment(); + if (this->roundtrip_->explicit_document_start) { + this->roundtrip_->post_start_comments = std::move(comments); + } else { + this->roundtrip_->leading_comments = std::move(comments); + } + } + + auto result{this->parse_value(token.value(), JSON::ParseContext::Root, 0, + empty_property_)}; + + auto pos_before_token{this->lexer_->position()}; + token = this->next_token(); + if (this->roundtrip_) { + auto root_inline{this->lexer_->take_inline_comment()}; + if (root_inline.has_value()) { + this->roundtrip_->styles[this->pointer_stack_].comment_inline = + std::move(root_inline); + } + } + while (token.has_value() && token->type == TokenType::DocumentEnd) { + if (this->roundtrip_) { + this->roundtrip_->pre_end_comments = + this->lexer_->take_preceding_comments(); + this->roundtrip_->explicit_document_end = true; + } + pos_before_token = this->lexer_->position(); + token = this->next_token(); + if (this->roundtrip_) { + this->roundtrip_->document_end_comment = + this->lexer_->take_inline_comment(); + } + } + + if (this->roundtrip_) { + auto trailing{this->lexer_->take_preceding_comments()}; + if (!trailing.empty()) { + this->roundtrip_->trailing_comments = std::move(trailing); + } + } + + if (token.has_value() && token->type != TokenType::StreamEnd) { + this->pending_tokens_.push_back(token.value()); + if (token->type == TokenType::DocumentStart) { + this->pending_token_position_ = token->position; + } else { + this->pending_token_position_ = pos_before_token; + } + } + + return result; + } + + [[nodiscard]] auto position() const noexcept -> std::size_t { + if (this->pending_token_position_.has_value()) { + return *this->pending_token_position_; + } + return this->lexer_->position(); + } + + auto validate_end_of_stream() -> void { + auto token{this->next_token()}; + bool saw_document_end{false}; + while (token.has_value() && token->type == TokenType::DocumentEnd) { + saw_document_end = true; + token = this->next_token(); + } + if (!token.has_value() || token->type == TokenType::StreamEnd) { + return; + } + while (token.has_value() && token->type != TokenType::StreamEnd) { + if (token->type == TokenType::DocumentStart) { + this->tag_directives_.clear(); + token = this->next_token(); + if (!token.has_value() || token->type == TokenType::StreamEnd) { + return; + } + if (token->type == TokenType::DocumentEnd || + token->type == TokenType::DocumentStart) { + continue; + } + } + if (token->type == TokenType::DirectiveYAML || + token->type == TokenType::DirectiveTag || + token->type == TokenType::DirectiveReserved) { + if (!saw_document_end) { + throw YAMLParseError{token->line, token->column, + "Directive not allowed without preceding " + "document end marker"}; + } + this->process_directives(token.value()); + continue; + } + if (!saw_document_end && token->type != TokenType::DocumentStart) { + throw YAMLParseError{token->line, token->column, + "Unexpected content after document"}; + } + this->parse_value(token.value(), JSON::ParseContext::Root, 0, + empty_property_); + saw_document_end = false; + token = this->next_token(); + while (token.has_value() && token->type == TokenType::DocumentEnd) { + saw_document_end = true; + token = this->next_token(); + } + } + } + +private: + auto process_directives(Token &token) -> void { + bool seen_yaml_directive{false}; + while (token.type == TokenType::DirectiveYAML || + token.type == TokenType::DirectiveTag || + token.type == TokenType::DirectiveReserved) { + if (token.type == TokenType::DirectiveYAML) { + if (seen_yaml_directive) { + throw YAMLParseError{token.line, token.column, + "Duplicate %YAML directive"}; + } + seen_yaml_directive = true; + const auto content{token.value}; + auto cursor{static_cast(5)}; + while (cursor < content.size() && + (content[cursor] == ' ' || content[cursor] == '\t')) { + cursor++; + } + while (cursor < content.size() && content[cursor] != ' ' && + content[cursor] != '\t' && content[cursor] != '#') { + cursor++; + } + while (cursor < content.size() && + (content[cursor] == ' ' || content[cursor] == '\t')) { + cursor++; + } + if (cursor < content.size() && content[cursor] != '#') { + throw YAMLParseError{token.line, token.column, + "Invalid content in %YAML directive"}; + } + } else if (token.type == TokenType::DirectiveTag) { + const auto content{token.value}; + auto cursor{static_cast(4)}; + while (cursor < content.size() && + (content[cursor] == ' ' || content[cursor] == '\t')) { + cursor++; + } + const auto handle_start{cursor}; + while (cursor < content.size() && content[cursor] != ' ' && + content[cursor] != '\t') { + cursor++; + } + const auto handle{ + std::string{content.substr(handle_start, cursor - handle_start)}}; + while (cursor < content.size() && + (content[cursor] == ' ' || content[cursor] == '\t')) { + cursor++; + } + const auto prefix_start{cursor}; + while (cursor < content.size() && content[cursor] != ' ' && + content[cursor] != '\t' && content[cursor] != '\n' && + content[cursor] != '\r') { + cursor++; + } + const auto prefix{ + std::string{content.substr(prefix_start, cursor - prefix_start)}}; + if (!handle.empty() && !prefix.empty()) { + this->tag_directives_.insert_or_assign(handle, prefix); + } + } + auto next{this->lexer_->next()}; + if (!next.has_value()) { + break; + } + token = next.value(); + } + } + + auto resolve_tag(const std::string_view raw_tag) -> std::string { + if (raw_tag.size() > 2 && raw_tag[0] == '!' && raw_tag[1] == '<' && + raw_tag.back() == '>') { + return std::string{raw_tag.substr(2, raw_tag.size() - 3)}; + } + + if (raw_tag.starts_with("!!")) { + const auto iterator{this->tag_directives_.find("!!")}; + if (iterator != this->tag_directives_.end()) { + return iterator->second + std::string{raw_tag.substr(2)}; + } + return "tag:yaml.org,2002:" + std::string{raw_tag.substr(2)}; + } + + if (raw_tag.size() > 1 && raw_tag[0] == '!') { + const auto second_bang{raw_tag.find('!', 1)}; + if (second_bang != std::string_view::npos && + second_bang < raw_tag.size() - 1) { + const auto handle{std::string{raw_tag.substr(0, second_bang + 1)}}; + const auto iterator{this->tag_directives_.find(handle)}; + if (iterator != this->tag_directives_.end()) { + return iterator->second + + std::string{raw_tag.substr(second_bang + 1)}; + } + } + } + + return std::string{raw_tag}; + } + + auto invoke_callback(const JSON::ParsePhase phase, const JSON::Type type, + const std::uint64_t line, const std::uint64_t column, + const JSON::ParseContext context, + const std::size_t index, const std::string &property) + -> void { + if (this->callback_ && *this->callback_) { + (*this->callback_)(phase, type, line, column, context, index, property); + } + + if (this->recording_anchor_) { + this->current_anchor_callbacks_.push_back( + {phase, type, line, column, context, index, std::string{property}}); + } + } + + [[nodiscard]] auto effective_line(const Token &token, + const JSON::ParseContext context, + const std::uint64_t key_line) const + -> std::uint64_t { + return (context == JSON::ParseContext::Property && key_line > 0) + ? key_line + : token.line; + } + + [[nodiscard]] auto effective_column(const Token &token, + const JSON::ParseContext context, + const std::uint64_t key_column) const + -> std::uint64_t { + return (context == JSON::ParseContext::Property && key_column > 0) + ? key_column + : token.column; + } + + [[nodiscard]] auto json_to_key_string(const JSON &value) const + -> std::string { + if (value.is_string()) { + return value.to_string(); + } + if (value.is_null()) { + return ""; + } + std::ostringstream stream; + stream << value; + return stream.str(); + } + + auto parse_value(const Token &token, const JSON::ParseContext context, + const std::size_t index, const std::string &property, + const std::uint64_t key_line = 0, + const std::uint64_t key_column = 0) -> JSON { + if (this->roundtrip_) { + if (context == JSON::ParseContext::Property) { + this->pointer_stack_.push_back(std::string{property}); + } else if (context == JSON::ParseContext::Index) { + this->pointer_stack_.push_back(index); + } + } + + std::optional anchor_name; + std::uint64_t anchor_line{0}; + std::optional tag; + std::size_t anchor_count{0}; + std::optional anchor_inline_comment; + Token current_token{token}; + std::uint64_t node_start_column{token.column}; + std::uint64_t prefix_line{token.line}; + + while (current_token.type == TokenType::Anchor || + current_token.type == TokenType::Tag) { + if (this->lexer_->flow_level() == 0 && + context == JSON::ParseContext::Property && key_line > 0 && + current_token.line != key_line) { + const auto value_indent{ + current_token.column > 0 + ? static_cast(current_token.column - 1) + : static_cast(0)}; + const auto parent_indent{this->lexer_->block_indent()}; + if (parent_indent != SIZE_MAX && value_indent <= parent_indent) { + throw YAMLParseError{current_token.line, current_token.column, + "Node property at wrong indentation level"}; + } + } + if (current_token.type == TokenType::Anchor) { + anchor_name = current_token.value; + anchor_line = current_token.line; + anchor_count++; + } else { + tag = this->resolve_tag(current_token.value); + } + + auto next{this->lexer_->next()}; + if (this->roundtrip_ && anchor_name.has_value()) { + anchor_inline_comment = this->lexer_->take_inline_comment(); + } + if (!next.has_value() || next->type == TokenType::StreamEnd || + next->type == TokenType::DocumentEnd || + next->type == TokenType::DocumentStart) { + JSON empty_value{nullptr}; + if (tag.has_value()) { + if (tag.value() == "tag:yaml.org,2002:str") { + empty_value = JSON{std::string{}}; + } + } + if (next.has_value()) { + this->pending_tokens_.push_back(next.value()); + } + if (this->roundtrip_ && anchor_name.has_value()) { + auto &style{this->roundtrip_->styles[this->pointer_stack_]}; + style.anchor = std::string{anchor_name.value()}; + if (anchor_inline_comment.has_value()) { + style.comment_inline = std::move(anchor_inline_comment); + } + } + if (this->roundtrip_ && context != JSON::ParseContext::Root) { + this->pointer_stack_.pop_back(); + } + return empty_value; + } + current_token = next.value(); + + if (current_token.type == TokenType::Scalar && + current_token.column <= key_column && key_column > 0) { + auto after{this->lexer_->next()}; + if (after.has_value() && after->type == TokenType::BlockMappingValue) { + this->pending_tokens_.push_back(current_token); + this->pending_tokens_.push_back(after.value()); + if (anchor_name.has_value()) { + this->register_anchored_null(anchor_name.value(), token, context, + index, property, + anchor_inline_comment); + } + if (this->roundtrip_ && context != JSON::ParseContext::Root) { + this->pointer_stack_.pop_back(); + } + return JSON{nullptr}; + } + if (after.has_value()) { + this->pending_tokens_.push_back(after.value()); + } + } + + if (anchor_name.has_value() && context == JSON::ParseContext::Index && + current_token.type == TokenType::BlockSequenceEntry) { + const auto block_indent{this->lexer_->block_indent()}; + const auto entry_indent{ + current_token.column > 0 + ? static_cast(current_token.column - 1) + : static_cast(0)}; + if (block_indent != SIZE_MAX && entry_indent <= block_indent) { + this->pending_tokens_.push_back(current_token); + this->register_anchored_null(anchor_name.value(), token, context, + index, property, anchor_inline_comment); + if (this->roundtrip_ && context != JSON::ParseContext::Root) { + this->pointer_stack_.pop_back(); + } + return JSON{nullptr}; + } + } + } + + if (tag.has_value() && (current_token.type == TokenType::FlowEntry || + current_token.type == TokenType::MappingEnd || + current_token.type == TokenType::SequenceEnd)) { + JSON empty_value{nullptr}; + if (tag.value() == "tag:yaml.org,2002:str") { + empty_value = JSON{std::string{}}; + } + this->pending_tokens_.push_back(current_token); + if (this->roundtrip_ && context != JSON::ParseContext::Root) { + this->pointer_stack_.pop_back(); + } + return empty_value; + } + + if (current_token.line != prefix_line) { + node_start_column = 0; + } + + if ((anchor_name.has_value() || tag.has_value()) && + this->lexer_->flow_level() == 0 && current_token.line == prefix_line && + current_token.type == TokenType::BlockSequenceEntry) { + throw YAMLParseError{current_token.line, current_token.column, + "Block sequence after node property must start " + "on a new line"}; + } + + if (anchor_name.has_value()) { + this->recording_anchor_ = true; + this->current_anchor_callbacks_.clear(); + } + + JSON result{nullptr}; + + switch (current_token.type) { + case TokenType::Scalar: { + auto next{this->next_token()}; + if (next.has_value() && next->type == TokenType::BlockMappingValue) { + if (current_token.multiline) { + throw YAMLParseError{current_token.line, current_token.column, + "Multi-line implicit mapping key"}; + } + if (this->lexer_->flow_level() > 0 && + next->line != current_token.line) { + throw YAMLParseError{next->line, next->column, + "Implicit key and value indicator on " + "different lines in flow context"}; + } + if (this->lexer_->flow_level() == 0 && + context == JSON::ParseContext::Property && key_line > 0 && + current_token.line == key_line) { + throw YAMLParseError{current_token.line, current_token.column, + "Implicit mapping key in block value on " + "same line as parent key"}; + } + if (this->lexer_->flow_level() == 0 && + (anchor_name.has_value() || tag.has_value()) && + this->document_start_line_ > 0 && + current_token.line == this->document_start_line_) { + throw YAMLParseError{ + current_token.line, current_token.column, + "Node properties before implicit mapping key on " + "document start line"}; + } + if (anchor_name.has_value() && anchor_line == current_token.line) { + JSON key_value{std::string{current_token.value}}; + this->recording_anchor_ = false; + this->anchors_.insert_or_assign( + std::string{anchor_name.value()}, + AnchoredValue{.value = key_value, + .callbacks = + std::move(this->current_anchor_callbacks_)}); + this->current_anchor_callbacks_.clear(); + anchor_name.reset(); + } + result = this->parse_block_mapping_from_first_key( + current_token, context, index, property, key_line, key_column, + node_start_column); + } else { + if (anchor_count > 1) { + throw YAMLParseError{current_token.line, current_token.column, + "Multiple anchors on a scalar node"}; + } + result = this->parse_scalar(current_token, tag, context, index, + property, key_line, key_column); + if (next.has_value()) { + this->pending_tokens_.push_back(next.value()); + } + } + break; + } + case TokenType::MappingStart: + result = this->parse_flow_mapping(current_token, context, index, + property, key_line, key_column); + this->record_collection_style(YAMLRoundTrip::CollectionStyle::Flow); + break; + case TokenType::SequenceStart: + result = this->parse_flow_sequence(current_token, context, index, + property, key_line, key_column); + this->record_collection_style(YAMLRoundTrip::CollectionStyle::Flow); + break; + case TokenType::BlockSequenceEntry: + result = this->parse_block_sequence(current_token, context, index, + property, key_line, key_column); + break; + case TokenType::BlockMappingKey: + case TokenType::BlockMappingValue: + result = this->parse_block_mapping(current_token, context, index, + property, key_line, key_column); + break; + case TokenType::Alias: { + auto next{this->next_token()}; + if (next.has_value() && next->type == TokenType::BlockMappingValue) { + const std::string alias_name{current_token.value}; + const auto iterator{this->anchors_.find(alias_name)}; + if (iterator == this->anchors_.end()) { + throw YAMLUnknownAnchorError{alias_name, current_token.line, + current_token.column}; + } + const auto key_string{ + this->json_to_key_string(iterator->second.value)}; + Token key_token{current_token}; + key_token.type = TokenType::Scalar; + key_token.value = key_string; + result = this->parse_block_mapping_from_first_key( + key_token, context, index, property, key_line, key_column, + node_start_column); + } else { + if (anchor_name.has_value()) { + throw YAMLParseError{current_token.line, current_token.column, + "Cannot anchor an alias node"}; + } + result = this->resolve_alias(current_token, context, index, property, + key_line, key_column); + if (this->roundtrip_) { + this->roundtrip_->aliases[this->pointer_stack_] = + std::string{current_token.value}; + } + if (next.has_value()) { + this->pending_tokens_.push_back(next.value()); + } + } + break; + } + default: + throw YAMLParseError{current_token.line, current_token.column, + "Unexpected token"}; + } + + if (anchor_name.has_value()) { + this->recording_anchor_ = false; + this->anchors_.insert_or_assign( + std::string{anchor_name.value()}, + AnchoredValue{.value = result, + .callbacks = + std::move(this->current_anchor_callbacks_)}); + this->current_anchor_callbacks_.clear(); + + if (this->roundtrip_) { + auto &style{this->roundtrip_->styles[this->pointer_stack_]}; + style.anchor = std::string{anchor_name.value()}; + if (anchor_inline_comment.has_value()) { + style.comment_inline = std::move(anchor_inline_comment); + } + } + } + + if (this->roundtrip_ && context != JSON::ParseContext::Root) { + this->pointer_stack_.pop_back(); + } + + return result; + } + + auto parse_scalar(const Token &token, const std::optional &tag, + const JSON::ParseContext context, const std::size_t index, + const std::string &property, + const std::uint64_t key_line = 0, + const std::uint64_t key_column = 0) -> JSON { + JSON result{this->interpret_scalar(token.value, token.scalar_style, tag)}; + this->record_scalar_style(token); + + this->invoke_callback(JSON::ParsePhase::Pre, result.type(), + this->effective_line(token, context, key_line), + this->effective_column(token, context, key_column), + context, index, property); + + auto end_column{token.column}; + if (!token.value.empty()) { + end_column += static_cast(token.value.size()) - 1; + } + if (token.scalar_style == ScalarStyle::SingleQuoted || + token.scalar_style == ScalarStyle::DoubleQuoted) { + end_column += 2; + } + + this->invoke_callback(JSON::ParsePhase::Post, result.type(), token.line, + end_column, JSON::ParseContext::Root, 0, + empty_property_); + + return result; + } + + auto interpret_scalar(const std::string_view value, const ScalarStyle style, + const std::optional &tag) -> JSON { + if (tag.has_value()) { + const auto &tag_value{tag.value()}; + if (tag_value == "!" || tag_value == "tag:yaml.org,2002:str") { + return JSON{std::string{value}}; + } + if (tag_value == "tag:yaml.org,2002:null") { + return JSON{nullptr}; + } + if (tag_value == "tag:yaml.org,2002:bool") { + if (value == "true" || value == "True" || value == "TRUE") { + return JSON{true}; + } + return JSON{false}; + } + if (tag_value == "tag:yaml.org,2002:int") { + return this->parse_integer(value); + } + if (tag_value == "tag:yaml.org,2002:float") { + return this->parse_float(value); + } + return JSON{std::string{value}}; + } + + if (style != ScalarStyle::Plain) { + return JSON{std::string{value}}; + } + + if (value.empty()) { + return JSON{nullptr}; + } + + if (value == "null" || value == "Null" || value == "NULL" || value == "~") { + return JSON{nullptr}; + } + + if (value == "true" || value == "True" || value == "TRUE") { + return JSON{true}; + } + + if (value == "false" || value == "False" || value == "FALSE") { + return JSON{false}; + } + + if (value == ".inf" || value == ".Inf" || value == ".INF" || + value == "+.inf" || value == "+.Inf" || value == "+.INF" || + value == "-.inf" || value == "-.Inf" || value == "-.INF" || + value == ".nan" || value == ".NaN" || value == ".NAN") { + return JSON{std::string{value}}; + } + + if (this->looks_like_number(value)) { + return this->parse_number(value); + } + + return JSON{std::string{value}}; + } + + [[nodiscard]] auto looks_like_number(const std::string_view value) const + -> bool { + if (value.empty()) { + return false; + } + + std::size_t start{0}; + if (value[0] == '-' || value[0] == '+') { + start = 1; + if (start >= value.size()) { + return false; + } + } + + if (value.size() > start + 1 && value[start] == '0') { + if (value[start + 1] == 'x' || value[start + 1] == 'X') { + return true; + } + if (value[start + 1] == 'o' || value[start + 1] == 'O') { + return true; + } + } + + bool has_digit{false}; + bool has_dot{false}; + bool has_exp{false}; + + for (std::size_t index = start; index < value.size(); ++index) { + const char current{value[index]}; + if (current >= '0' && current <= '9') { + has_digit = true; + } else if (current == '.') { + if (has_dot || has_exp) { + return false; + } + has_dot = true; + } else if (current == 'e' || current == 'E') { + if (has_exp || !has_digit) { + return false; + } + has_exp = true; + if (index + 1 < value.size() && + (value[index + 1] == '+' || value[index + 1] == '-')) { + ++index; + } + } else { + return false; + } + } + + return has_digit; + } + + auto parse_number(const std::string_view value) -> JSON { + const std::size_t prefix{(value[0] == '-' || value[0] == '+') ? 1u : 0u}; + if (value.size() > prefix + 1 && value[prefix] == '0') { + const char indicator{value[prefix + 1]}; + if (indicator == 'x' || indicator == 'X') { + return this->parse_base_integer(value, 16); + } + if (indicator == 'o' || indicator == 'O') { + return this->parse_base_integer(value, 8); + } + } + + bool has_dot{false}; + bool has_exp{false}; + for (const char character : value) { + if (character == '.') { + has_dot = true; + } + if (character == 'e' || character == 'E') { + has_exp = true; + } + } + + if (has_exp) { + return JSON{Decimal{std::string{value}}}; + } + + if (has_dot) { + return this->parse_float(value); + } + + return this->parse_integer(value); + } + + auto parse_integer(const std::string_view value) -> JSON { + const auto result{to_int64_t(std::string{value})}; + return result.has_value() ? JSON{result.value()} + : JSON{Decimal{std::string{value}}}; + } + + auto parse_base_integer(const std::string_view value, const int base) + -> JSON { + const bool negative{value[0] == '-'}; + const std::size_t start{(value[0] == '-' || value[0] == '+') ? 3u : 2u}; + const auto result{to_int64_t(std::string{value.substr(start)}, base)}; + if (result.has_value()) { + return JSON{negative ? -result.value() : result.value()}; + } + return JSON{std::string{value}}; + } + + auto parse_float(const std::string_view value) -> JSON { + std::size_t significant_digits{0}; + bool seen_nonzero{false}; + for (const char character : value) { + if (character >= '0' && character <= '9') { + if (character != '0' || seen_nonzero) { + seen_nonzero = true; + significant_digits++; + } + } + } + + constexpr std::size_t double_precision_limit{15}; + if (significant_digits > double_precision_limit) { + return JSON{Decimal{std::string{value}}}; + } + + const auto result{to_double(std::string{value})}; + if (!result.has_value()) { + return JSON{Decimal{std::string{value}}}; + } + + const auto as_integer{static_cast(result.value())}; + if (result.value() == static_cast(as_integer)) { + return JSON{as_integer}; + } + + return JSON{result.value()}; + } + + auto parse_flow_mapping(const Token &start_token, + const JSON::ParseContext context, + const std::size_t index, const std::string &property, + const std::uint64_t key_line = 0, + const std::uint64_t key_column = 0) -> JSON { + this->invoke_callback( + JSON::ParsePhase::Pre, JSON::Type::Object, + this->effective_line(start_token, context, key_line), + this->effective_column(start_token, context, key_column), context, + index, property); + + JSON result{JSON::make_object()}; + std::unordered_set seen_keys; + bool found_compact_separator{false}; + + auto token{this->next_token()}; + + while (token.has_value() && token->type != TokenType::MappingEnd) { + if (token->type == TokenType::FlowEntry) { + if (token->compact_separator) { + found_compact_separator = true; + } + token = this->next_token(); + continue; + } + + auto key_token{token.value()}; + + std::optional key_tag; + while (key_token.type == TokenType::Anchor || + key_token.type == TokenType::Tag) { + if (key_token.type == TokenType::Tag) { + key_tag = this->resolve_tag(key_token.value); + } + token = this->next_token(); + if (!token.has_value()) { + throw YAMLParseError{this->lexer_->line(), this->lexer_->column(), + "Unexpected end of input in flow mapping"}; + } + key_token = token.value(); + } + + std::string key; + if (key_token.type == TokenType::BlockMappingValue) { + if (key_tag.has_value() && key_tag.value() == "tag:yaml.org,2002:str") { + key = ""; + } + } else if (key_token.type == TokenType::Scalar) { + key = std::string{key_token.value}; + this->record_key_scalar_style(key, key_token.scalar_style, + key_token.quoted_original); + } else { + throw YAMLParseError{key_token.line, key_token.column, + "Expected scalar key in mapping"}; + } + + if (seen_keys.contains(key)) { + throw YAMLDuplicateKeyError{key, key_token.line, key_token.column}; + } + seen_keys.insert(key); + + if (key_token.type != TokenType::BlockMappingValue) { + token = this->next_token(); + + if (!token.has_value()) { + throw YAMLParseError{this->lexer_->line(), this->lexer_->column(), + "Unexpected end of input in flow mapping"}; + } + + if (token->type == TokenType::FlowEntry || + token->type == TokenType::MappingEnd) { + if (token->type == TokenType::FlowEntry && token->compact_separator) { + found_compact_separator = true; + } + result.assign(key, JSON{nullptr}); + continue; + } + + if (token->type != TokenType::BlockMappingValue) { + const auto colon_column{key_token.column + + static_cast(key.size())}; + throw YAMLParseError{key_token.line, colon_column, + "Expected ':' after mapping key"}; + } + } + + token = this->next_token(); + if (!token.has_value()) { + throw YAMLParseError{this->lexer_->line(), this->lexer_->column(), + "Expected value after ':'"}; + } + + if (token->type == TokenType::FlowEntry || + token->type == TokenType::MappingEnd) { + if (token->type == TokenType::FlowEntry && token->compact_separator) { + found_compact_separator = true; + } + result.assign(key, JSON{nullptr}); + } else { + auto value{this->parse_value(token.value(), + JSON::ParseContext::Property, 0, key, + key_token.line, key_token.column)}; + result.assign(key, std::move(value)); + } + + if (token->type != TokenType::FlowEntry && + token->type != TokenType::MappingEnd) { + token = this->next_token(); + if (token.has_value() && token->type == TokenType::FlowEntry && + token->compact_separator) { + found_compact_separator = true; + } + if (token.has_value() && token->type != TokenType::FlowEntry && + token->type != TokenType::MappingEnd) { + throw YAMLParseError{token->line, token->column, + "Missing comma between flow mapping entries"}; + } + } + } + + const auto end_line{token.has_value() ? token->line : this->lexer_->line()}; + const auto end_column{token.has_value() ? token->column + : this->lexer_->column()}; + this->invoke_callback(JSON::ParsePhase::Post, JSON::Type::Object, end_line, + end_column, JSON::ParseContext::Root, 0, + empty_property_); + + if (this->roundtrip_ && found_compact_separator) { + this->roundtrip_->styles[this->pointer_stack_].compact_flow = true; + } + + return result; + } + + auto parse_flow_sequence(const Token &start_token, + const JSON::ParseContext context, + const std::size_t index, const std::string &property, + const std::uint64_t key_line = 0, + const std::uint64_t key_column = 0) -> JSON { + this->invoke_callback( + JSON::ParsePhase::Pre, JSON::Type::Array, + this->effective_line(start_token, context, key_line), + this->effective_column(start_token, context, key_column), context, + index, property); + + JSON result{JSON::make_array()}; + const auto parent_block_indent{this->lexer_->block_indent()}; + bool found_compact_separator{false}; + + auto token{this->next_token()}; + std::size_t element_index{0}; + + while (token.has_value() && token->type != TokenType::SequenceEnd) { + if (parent_block_indent != SIZE_MAX && token->line != start_token.line) { + const auto token_indent{ + token->column > 0 ? static_cast(token->column - 1) + : static_cast(0)}; + if (token_indent <= parent_block_indent) { + throw YAMLParseError{ + token->line, token->column, + "Flow content indented less than or equal to parent block level"}; + } + } + if (token->type == TokenType::FlowEntry) { + if (element_index == 0) { + throw YAMLParseError{token->line, token->column, + "Leading comma in flow sequence"}; + } + if (token->compact_separator) { + found_compact_separator = true; + } + token = this->next_token(); + if (token.has_value() && token->type == TokenType::FlowEntry) { + throw YAMLParseError{token->line, token->column, + "Empty entry in flow sequence"}; + } + continue; + } + + if (token->type == TokenType::BlockMappingKey) { + auto mapping{JSON::make_object()}; + token = this->next_token(); + if (!token.has_value()) { + throw YAMLParseError{this->lexer_->line(), this->lexer_->column(), + "Unexpected end after explicit key in flow"}; + } + + std::string key_string; + if (token->type == TokenType::Scalar) { + key_string = std::string{token->value}; + token = this->next_token(); + } else { + // For non-scalar keys, parse the value and stringify + auto key_value{this->parse_value(token.value(), + JSON::ParseContext::Index, + element_index, empty_property_)}; + key_string = this->json_to_key_string(key_value); + token = this->next_token(); + } + + if (token.has_value() && token->type == TokenType::BlockMappingValue) { + token = this->next_token(); + if (token.has_value() && token->type != TokenType::SequenceEnd && + token->type != TokenType::FlowEntry) { + auto value{this->parse_value( + token.value(), JSON::ParseContext::Property, 0, key_string)}; + mapping.assign(key_string, std::move(value)); + token = this->next_token(); + } else { + mapping.assign(key_string, JSON{nullptr}); + } + } else { + mapping.assign(key_string, JSON{nullptr}); + } + result.push_back(std::move(mapping)); + element_index++; + continue; + } + + auto value{this->parse_value(token.value(), JSON::ParseContext::Index, + element_index, empty_property_)}; + result.push_back(std::move(value)); + element_index++; + + token = this->next_token(); + if (token.has_value() && token->type == TokenType::FlowEntry && + token->compact_separator) { + found_compact_separator = true; + } + if (token.has_value() && token->type != TokenType::FlowEntry && + token->type != TokenType::SequenceEnd) { + throw YAMLParseError{token->line, token->column, + "Missing comma in flow sequence"}; + } + } + + const auto end_line{token.has_value() ? token->line : this->lexer_->line()}; + const auto end_column{token.has_value() ? token->column + : this->lexer_->column()}; + this->invoke_callback(JSON::ParsePhase::Post, JSON::Type::Array, end_line, + end_column, JSON::ParseContext::Root, 0, + empty_property_); + + if (this->roundtrip_ && found_compact_separator) { + this->roundtrip_->styles[this->pointer_stack_].compact_flow = true; + } + + return result; + } + + auto parse_block_sequence(const Token &start_token, + const JSON::ParseContext context, + const std::size_t index, + const std::string &property, + const std::uint64_t key_line = 0, + const std::uint64_t key_column = 0) -> JSON { + this->invoke_callback( + JSON::ParsePhase::Pre, JSON::Type::Array, + this->effective_line(start_token, context, key_line), + this->effective_column(start_token, context, key_column), context, + index, property); + + JSON result{JSON::make_array()}; + std::size_t element_index{0}; + const auto base_column{start_token.column}; + const auto sequence_indent{base_column > 0 + ? static_cast(base_column - 1) + : static_cast(0)}; + this->detect_indent_width(key_column, base_column); + this->lexer_->set_block_indent(sequence_indent); + this->record_preceding_comments_for_index(0); + + auto token{this->next_token()}; + if (token.has_value() && token->line != start_token.line) { + this->record_indicator_comment_for_index(element_index); + } + + if (token.has_value() && token->type != TokenType::BlockSequenceEntry && + token->type != TokenType::StreamEnd && + token->type != TokenType::DocumentEnd && + token->type != TokenType::DocumentStart) { + auto value{this->parse_value(token.value(), JSON::ParseContext::Index, + element_index, empty_property_)}; + result.push_back(std::move(value)); + element_index++; + token = this->next_token(); + } else if (token.has_value() && + token->type == TokenType::BlockSequenceEntry && + token->column == base_column) { + result.push_back(JSON{nullptr}); + element_index++; + } + + while (token.has_value() && token->type == TokenType::BlockSequenceEntry && + token->column >= base_column) { + if (element_index > 0) { + this->record_inline_comment_for_index(element_index - 1); + } + this->record_preceding_comments_for_index(element_index); + this->lexer_->set_block_indent(sequence_indent); + + if (token->column > base_column) { + if (token->column < base_column + 2) { + throw YAMLParseError{token->line, token->column, + "Wrong indentation for sequence entry"}; + } + auto value{this->parse_value(token.value(), JSON::ParseContext::Index, + element_index, empty_property_)}; + result.push_back(std::move(value)); + element_index++; + token = this->next_token(); + continue; + } + + const auto dash_line{token->line}; + token = this->next_token(); + if (token.has_value() && token->line != dash_line) { + this->record_indicator_comment_for_index(element_index); + } + + if (!token.has_value() || + (token->type == TokenType::BlockSequenceEntry && + token->column == base_column) || + token->type == TokenType::StreamEnd || + token->type == TokenType::DocumentEnd) { + result.push_back(JSON{nullptr}); + } else { + auto value{this->parse_value(token.value(), JSON::ParseContext::Index, + element_index, empty_property_)}; + result.push_back(std::move(value)); + token = this->next_token(); + } + + element_index++; + } + + if (element_index > 0) { + this->record_inline_comment_for_index(element_index - 1); + } + + std::uint64_t end_line{this->lexer_->line()}; + std::uint64_t end_column{this->lexer_->column()}; + + if (token.has_value()) { + this->pending_tokens_.push_back(token.value()); + end_line = token->line; + end_column = 0; + } + + this->invoke_callback(JSON::ParsePhase::Post, JSON::Type::Array, end_line, + end_column, JSON::ParseContext::Root, 0, + empty_property_); + + return result; + } + + auto parse_block_mapping(const Token &start_token, + const JSON::ParseContext context, + const std::size_t index, const std::string &property, + const std::uint64_t key_line = 0, + const std::uint64_t key_column = 0) -> JSON { + this->invoke_callback( + JSON::ParsePhase::Pre, JSON::Type::Object, + this->effective_line(start_token, context, key_line), + this->effective_column(start_token, context, key_column), context, + index, property); + + JSON result{JSON::make_object()}; + std::unordered_set seen_keys; + + auto token{start_token}; + const auto mapping_indent{ + start_token.column > 0 + ? static_cast(start_token.column - 1) + : static_cast(0)}; + + while (true) { + this->lexer_->set_block_indent(mapping_indent); + + if (token.type == TokenType::BlockMappingKey) { + auto next{this->next_token()}; + assert(next.has_value()); + token = next.value(); + } + + while (token.type == TokenType::Tag || token.type == TokenType::Anchor) { + auto next{this->next_token()}; + assert(next.has_value()); + token = next.value(); + } + + if (token.type != TokenType::Scalar && + token.type != TokenType::BlockMappingValue) { + if (token.type == TokenType::DocumentEnd || + token.type == TokenType::DocumentStart) { + this->pending_tokens_.push_back(token); + } + break; + } + + std::string key; + std::uint64_t current_key_line{0}; + std::uint64_t current_key_column{0}; + + if (token.type == TokenType::Scalar) { + key = token.value; + current_key_line = token.line; + current_key_column = token.column; + + if (seen_keys.contains(key)) { + throw YAMLDuplicateKeyError{key, token.line, token.column}; + } + seen_keys.insert(key); + + auto next{this->next_token()}; + if (!next.has_value() || next->type != TokenType::BlockMappingValue) { + result.assign(std::string{key}, JSON{nullptr}); + if (!next.has_value()) { + break; + } + token = next.value(); + continue; + } + token = next.value(); + } + + if (token.type == TokenType::BlockMappingValue) { + auto next{this->next_token()}; + + if (!next.has_value() || next->type == TokenType::StreamEnd || + next->type == TokenType::DocumentEnd || + next->type == TokenType::DocumentStart) { + result.assign(std::string{key}, JSON{nullptr}); + if (!next.has_value()) { + break; + } + token = next.value(); + continue; + } + + if (next->type == TokenType::BlockMappingValue || + next->type == TokenType::BlockMappingKey) { + if (key.empty() && next->type == TokenType::BlockMappingKey) { + token = next.value(); + continue; + } + result.assign(std::string{key}, JSON{nullptr}); + token = next.value(); + continue; + } + + if (key.empty() && next->type == TokenType::Scalar) { + key = next->value; + if (seen_keys.contains(key)) { + throw YAMLDuplicateKeyError{key, next->line, next->column}; + } + seen_keys.insert(key); + result.assign(std::string{key}, JSON{nullptr}); + auto next_after_key{this->next_token()}; + assert(next_after_key.has_value()); + token = next_after_key.value(); + continue; + } + + auto value{this->parse_value(next.value(), JSON::ParseContext::Property, + 0, key, current_key_line, + current_key_column)}; + result.assign(std::string{key}, std::move(value)); + + auto after{this->next_token()}; + if (!after.has_value()) { + break; + } + token = after.value(); + } + } + + this->invoke_callback(JSON::ParsePhase::Post, JSON::Type::Object, + this->lexer_->line(), this->lexer_->column(), + JSON::ParseContext::Root, 0, empty_property_); + + return result; + } + + auto resolve_alias(const Token &token, const JSON::ParseContext context, + const std::size_t index, const std::string &property, + const std::uint64_t key_line = 0, + const std::uint64_t key_column = 0) -> JSON { + const std::string anchor_name{token.value}; + const auto iterator{this->anchors_.find(anchor_name)}; + + if (iterator == this->anchors_.end()) { + throw YAMLUnknownAnchorError{anchor_name, token.line, token.column}; + } + + const auto &anchored{iterator->second}; + const auto alias_end_column{token.column + + static_cast(token.value.size())}; + + bool is_first_pre{true}; + bool is_last_post{false}; + std::size_t callback_index{0}; + for (const auto &record : anchored.callbacks) { + is_last_post = (callback_index == anchored.callbacks.size() - 1 && + record.phase == JSON::ParsePhase::Post); + + std::uint64_t callback_line{record.line}; + std::uint64_t callback_column{record.column}; + auto callback_context{record.context}; + auto callback_idx{record.index}; + std::string callback_property{record.property}; + + if (is_first_pre && record.phase == JSON::ParsePhase::Pre) { + if (context == JSON::ParseContext::Property && key_line > 0) { + callback_line = key_line; + callback_column = key_column; + } + callback_context = context; + callback_idx = index; + callback_property = property; + is_first_pre = false; + } + + if (is_last_post) { + callback_line = token.line; + callback_column = alias_end_column; + callback_context = JSON::ParseContext::Root; + callback_idx = 0; + callback_property.clear(); + } + + this->invoke_callback(record.phase, record.type, callback_line, + callback_column, callback_context, callback_idx, + callback_property); + callback_index++; + } + + return anchored.value; + } + + auto next_token() -> std::optional { + std::optional result; + if (!this->pending_tokens_.empty()) { + result = this->pending_tokens_.front(); + this->pending_tokens_.pop_front(); + if (this->pending_tokens_.empty()) { + this->pending_token_position_.reset(); + } + } else { + result = this->lexer_->next(); + } + return result; + } + + auto parse_block_mapping_from_first_key( + const Token &key_token, const JSON::ParseContext context, + const std::size_t index, const std::string &property, + const std::uint64_t parent_key_line = 0, + const std::uint64_t parent_key_column = 0, + const std::uint64_t node_start_column = 0) -> JSON { + this->invoke_callback( + JSON::ParsePhase::Pre, JSON::Type::Object, + this->effective_line(key_token, context, parent_key_line), + this->effective_column(key_token, context, parent_key_column), context, + index, property); + + JSON result{JSON::make_object()}; + std::unordered_set seen_keys; + const auto base_column{node_start_column > 0 ? node_start_column + : key_token.column}; + + this->detect_indent_width(parent_key_column, base_column); + + std::string key{key_token.value}; + std::uint64_t key_line{key_token.line}; + std::uint64_t key_column{key_token.column}; + const auto first_key_line{key_token.line}; + seen_keys.insert(key); + this->record_key_scalar_style(key, key_token.scalar_style, + key_token.quoted_original); + this->record_preceding_comments_for_key(key); + + this->lexer_->set_block_indent(static_cast(base_column - 1)); + auto next{this->next_token()}; + + if (!next.has_value() || next->type == TokenType::Scalar || + next->type == TokenType::StreamEnd || + next->type == TokenType::DocumentEnd) { + if (next.has_value() && next->type == TokenType::Scalar && + (next->line == key_line || next->column != base_column)) { + this->record_inline_comment_for_key(key, next->line != key_line); + auto value{this->parse_value(next.value(), JSON::ParseContext::Property, + 0, key, key_line, key_column)}; + result.assign(std::string{key}, std::move(value)); + this->record_inline_comment_for_key(key); + next = this->next_token(); + } else if (next.has_value() && next->type == TokenType::Scalar) { + this->record_inline_comment_for_key(key); + result.assign(std::string{key}, JSON{nullptr}); + } else { + this->invoke_callback(JSON::ParsePhase::Pre, JSON::Type::Null, key_line, + key_column, JSON::ParseContext::Property, 0, key); + const auto null_post_column{key_column + + static_cast(key.size())}; + this->invoke_callback(JSON::ParsePhase::Post, JSON::Type::Null, + key_line, null_post_column, + JSON::ParseContext::Root, 0, empty_property_); + result.assign(std::string{key}, JSON{nullptr}); + } + } else if (next->type == TokenType::MappingStart || + next->type == TokenType::SequenceStart || + next->type == TokenType::BlockSequenceEntry || + next->type == TokenType::Anchor || + next->type == TokenType::Tag || next->type == TokenType::Alias) { + if (next->type == TokenType::BlockSequenceEntry && + next->line == key_line) { + throw YAMLParseError{ + next->line, next->column, + "Block sequence entry on same line as mapping key"}; + } + this->record_inline_comment_for_key(key, next->line != key_line); + auto value{this->parse_value(next.value(), JSON::ParseContext::Property, + 0, key, key_line, key_column)}; + result.assign(std::string{key}, std::move(value)); + next = this->next_token(); + this->record_inline_comment_for_key(key); + } else { + result.assign(std::string{key}, JSON{nullptr}); + } + + while (next.has_value() && + (next->type == TokenType::Scalar || + next->type == TokenType::BlockMappingKey || + next->type == TokenType::Anchor || next->type == TokenType::Tag || + next->type == TokenType::Alias)) { + if (this->document_start_line_ > 0 && + first_key_line == this->document_start_line_ && + next->line != this->document_start_line_) { + throw YAMLParseError{next->line, next->column, + "Block mapping continuation after document " + "start line"}; + } + this->lexer_->set_block_indent(static_cast(base_column - 1)); + + if (next->type == TokenType::BlockMappingKey) { + if (next->column < base_column) { + break; + } + next = this->next_token(); + if (!next.has_value() || next->type != TokenType::Scalar) { + result.assign("", JSON{nullptr}); + next = this->next_token(); + continue; + } + + key = next->value; + key_line = next->line; + key_column = next->column; + this->record_key_scalar_style(key, next->scalar_style, + next->quoted_original); + + if (seen_keys.contains(key)) { + throw YAMLDuplicateKeyError{key, next->line, next->column}; + } + seen_keys.insert(key); + + auto colon{this->next_token()}; + if (!colon.has_value() || colon->type != TokenType::BlockMappingValue) { + result.assign(std::string{key}, JSON{nullptr}); + if (colon.has_value()) { + this->pending_tokens_.push_back(colon.value()); + } + next = this->next_token(); + continue; + } + + next = this->next_token(); + if (!next.has_value() || next->type == TokenType::StreamEnd || + next->type == TokenType::DocumentEnd || + next->type == TokenType::DocumentStart) { + result.assign(std::string{key}, JSON{nullptr}); + if (next.has_value()) { + this->pending_tokens_.push_back(next.value()); + } + break; + } + if (next->type == TokenType::BlockMappingValue || + next->type == TokenType::BlockMappingKey) { + result.assign(std::string{key}, JSON{nullptr}); + } else { + this->record_inline_comment_for_key(key, next->line != key_line); + this->lexer_->set_block_indent( + static_cast(base_column - 1)); + auto value{this->parse_value(next.value(), + JSON::ParseContext::Property, 0, key, + key_line, key_column)}; + result.assign(std::string{key}, std::move(value)); + next = this->next_token(); + } + continue; + } + + auto effective_column{next->column}; + + if (next->type == TokenType::Anchor) { + next = this->next_token(); + if (!next.has_value() || next->type != TokenType::Scalar) { + continue; + } + } + + if (next->type == TokenType::Tag) { + next = this->next_token(); + if (!next.has_value() || next->type != TokenType::Scalar) { + continue; + } + } + + if (next->type == TokenType::Alias) { + if (effective_column != base_column) { + break; + } + const std::string alias_name{next->value}; + const auto iterator{this->anchors_.find(alias_name)}; + if (iterator == this->anchors_.end()) { + throw YAMLUnknownAnchorError{alias_name, next->line, next->column}; + } + key = this->json_to_key_string(iterator->second.value); + key_line = next->line; + key_column = next->column; + + if (seen_keys.contains(key)) { + throw YAMLDuplicateKeyError{key, next->line, next->column}; + } + seen_keys.insert(key); + + auto colon{this->next_token()}; + if (!colon.has_value() || colon->type != TokenType::BlockMappingValue) { + result.assign(std::string{key}, JSON{nullptr}); + if (colon.has_value()) { + this->pending_tokens_.push_back(colon.value()); + } + next = this->next_token(); + continue; + } + + next = this->next_token(); + + if (!next.has_value() || next->type == TokenType::Scalar) { + if (next.has_value()) { + auto value{this->parse_value(next.value(), + JSON::ParseContext::Property, 0, key, + key_line, key_column)}; + result.assign(std::string{key}, std::move(value)); + next = this->next_token(); + } else { + result.assign(std::string{key}, JSON{nullptr}); + } + } else if (next->type == TokenType::StreamEnd || + next->type == TokenType::DocumentEnd || + next->type == TokenType::DocumentStart) { + result.assign(std::string{key}, JSON{nullptr}); + break; + } else { + auto value{this->parse_value(next.value(), + JSON::ParseContext::Property, 0, key, + key_line, key_column)}; + result.assign(std::string{key}, std::move(value)); + next = this->next_token(); + } + continue; + } + + if (effective_column != base_column) { + break; + } + + this->record_inline_comment_for_key(key); + key = next->value; + key_line = next->line; + key_column = next->column; + this->record_key_scalar_style(key, next->scalar_style, + next->quoted_original); + this->record_preceding_comments_for_key(key); + + if (next->multiline) { + throw YAMLParseError{next->line, next->column, + "Multi-line implicit mapping key"}; + } + + if (seen_keys.contains(key)) { + throw YAMLDuplicateKeyError{key, next->line, next->column}; + } + seen_keys.insert(key); + + auto colon{this->next_token()}; + if (!colon.has_value() || colon->type != TokenType::BlockMappingValue) { + if (colon.has_value()) { + this->pending_tokens_.push_back(colon.value()); + } + break; + } + + next = this->next_token(); + + if (!next.has_value() || next->type == TokenType::Scalar) { + if (next.has_value() && + (next->line == key_line || next->column != base_column)) { + this->record_inline_comment_for_key(key, next->line != key_line); + auto after{this->next_token()}; + if (after.has_value()) { + this->pending_tokens_.push_back(after.value()); + } + auto value{this->parse_value(next.value(), + JSON::ParseContext::Property, 0, key, + key_line, key_column)}; + result.assign(std::string{key}, std::move(value)); + next = this->next_token(); + } else if (next.has_value()) { + this->record_inline_comment_for_key(key); + result.assign(std::string{key}, JSON{nullptr}); + } else { + result.assign(std::string{key}, JSON{nullptr}); + } + } else if (next->type == TokenType::StreamEnd || + next->type == TokenType::DocumentEnd || + next->type == TokenType::DocumentStart) { + result.assign(std::string{key}, JSON{nullptr}); + break; + } else { + this->record_inline_comment_for_key(key, next->line != key_line); + auto value{this->parse_value(next.value(), JSON::ParseContext::Property, + 0, key, key_line, key_column)}; + result.assign(std::string{key}, std::move(value)); + next = this->next_token(); + } + } + + this->record_inline_comment_for_key(key); + + if (next.has_value() && next->type != TokenType::StreamEnd) { + this->pending_tokens_.push_back(next.value()); + if (next->type == TokenType::DocumentStart) { + this->pending_token_position_ = next->position; + } + } + + this->invoke_callback(JSON::ParsePhase::Post, JSON::Type::Object, + this->lexer_->line(), this->lexer_->column(), + JSON::ParseContext::Root, 0, empty_property_); + + return result; + } + + auto record_preceding_comments_for_key(const std::string &key) -> void { + if (!this->roundtrip_) { + return; + } + auto comments{this->lexer_->take_preceding_comments()}; + if (comments.empty()) { + return; + } + this->pointer_stack_.push_back(key); + this->roundtrip_->styles[this->pointer_stack_].comments_before = + std::move(comments); + this->pointer_stack_.pop_back(); + } + + auto record_inline_comment_for_key(const std::string &key, + const bool on_indicator = false) -> void { + if (!this->roundtrip_) { + return; + } + auto comment{this->lexer_->take_inline_comment()}; + if (!comment.has_value()) { + return; + } + this->pointer_stack_.push_back(key); + if (on_indicator) { + this->roundtrip_->styles[this->pointer_stack_].comment_on_indicator = + std::move(comment); + } else { + this->roundtrip_->styles[this->pointer_stack_].comment_inline = + std::move(comment); + } + this->pointer_stack_.pop_back(); + } + + auto record_preceding_comments_for_index(const std::size_t index) -> void { + if (!this->roundtrip_) { + return; + } + auto comments{this->lexer_->take_preceding_comments()}; + if (comments.empty()) { + return; + } + this->pointer_stack_.push_back(index); + this->roundtrip_->styles[this->pointer_stack_].comments_before = + std::move(comments); + this->pointer_stack_.pop_back(); + } + + auto record_inline_comment_for_index(const std::size_t index) -> void { + if (!this->roundtrip_) { + return; + } + auto comment{this->lexer_->take_inline_comment()}; + if (!comment.has_value()) { + return; + } + this->pointer_stack_.push_back(index); + this->roundtrip_->styles[this->pointer_stack_].comment_inline = + std::move(comment); + this->pointer_stack_.pop_back(); + } + + auto record_indicator_comment_for_index(const std::size_t index) -> void { + if (!this->roundtrip_) { + return; + } + this->pointer_stack_.push_back(index); + auto indicator_comment{this->lexer_->take_inline_comment()}; + this->roundtrip_->styles[this->pointer_stack_].comment_on_indicator = + indicator_comment.has_value() ? std::move(indicator_comment.value()) + : std::string{}; + this->pointer_stack_.pop_back(); + } + + auto + register_anchored_null(const std::string_view anchor_name, const Token &token, + const JSON::ParseContext context, + const std::size_t index, const std::string &property, + std::optional &inline_comment) -> void { + this->recording_anchor_ = true; + this->current_anchor_callbacks_.clear(); + JSON null_value{nullptr}; + this->invoke_callback(JSON::ParsePhase::Pre, JSON::Type::Null, token.line, + token.column, context, index, property); + this->invoke_callback(JSON::ParsePhase::Post, JSON::Type::Null, token.line, + token.column, JSON::ParseContext::Root, 0, + empty_property_); + this->recording_anchor_ = false; + this->anchors_.insert_or_assign( + std::string{anchor_name}, + AnchoredValue{.value = null_value, + .callbacks = std::move(this->current_anchor_callbacks_)}); + this->current_anchor_callbacks_.clear(); + if (this->roundtrip_) { + auto &style{this->roundtrip_->styles[this->pointer_stack_]}; + style.anchor = std::string{anchor_name}; + if (inline_comment.has_value()) { + style.comment_inline = std::move(inline_comment); + } + } + } + + auto record_collection_style(const YAMLRoundTrip::CollectionStyle style) + -> void { + if (!this->roundtrip_) { + return; + } + + this->roundtrip_->styles[this->pointer_stack_].collection = style; + } + + auto record_scalar_style(const Token &token) -> void { + if (!this->roundtrip_) { + return; + } + + auto &node_style{this->roundtrip_->styles[this->pointer_stack_]}; + + switch (token.scalar_style) { + case ScalarStyle::Plain: + node_style.scalar = YAMLRoundTrip::ScalarStyle::Plain; + if (token.multiline && !token.block_original.empty()) { + node_style.plain_content = std::string{token.block_original}; + } else { + node_style.plain_content = std::string{token.value}; + } + break; + case ScalarStyle::SingleQuoted: + node_style.scalar = YAMLRoundTrip::ScalarStyle::SingleQuoted; + if (!token.quoted_original.empty()) { + node_style.quoted_content = std::string{token.quoted_original}; + } + break; + case ScalarStyle::DoubleQuoted: + node_style.scalar = YAMLRoundTrip::ScalarStyle::DoubleQuoted; + if (!token.quoted_original.empty()) { + node_style.quoted_content = std::string{token.quoted_original}; + } + break; + case ScalarStyle::Literal: + node_style.scalar = YAMLRoundTrip::ScalarStyle::Literal; + break; + case ScalarStyle::Folded: + node_style.scalar = YAMLRoundTrip::ScalarStyle::Folded; + break; + } + + if (token.scalar_style == ScalarStyle::Literal || + token.scalar_style == ScalarStyle::Folded) { + switch (token.chomping) { + case BlockChomping::Clip: + node_style.chomping = YAMLRoundTrip::Chomping::Clip; + break; + case BlockChomping::Strip: + node_style.chomping = YAMLRoundTrip::Chomping::Strip; + break; + case BlockChomping::Keep: + node_style.chomping = YAMLRoundTrip::Chomping::Keep; + break; + } + + node_style.explicit_indent = token.explicit_indent; + node_style.indent_before_chomping = token.indent_before_chomping; + + if (!token.block_original.empty()) { + node_style.block_content = std::string{token.block_original}; + } + + auto block_comment{this->lexer_->take_block_scalar_comment()}; + if (block_comment.has_value()) { + node_style.comment_inline = std::move(block_comment); + } + } + } + + auto record_key_scalar_style(const std::string &key, const ScalarStyle style, + const std::string_view quoted_original = {}) + -> void { + if (!this->roundtrip_) { + return; + } + this->pointer_stack_.push_back(key); + switch (style) { + case ScalarStyle::Plain: + this->roundtrip_->key_styles[this->pointer_stack_] = + YAMLRoundTrip::ScalarStyle::Plain; + break; + case ScalarStyle::SingleQuoted: + this->roundtrip_->key_styles[this->pointer_stack_] = + YAMLRoundTrip::ScalarStyle::SingleQuoted; + break; + case ScalarStyle::DoubleQuoted: + this->roundtrip_->key_styles[this->pointer_stack_] = + YAMLRoundTrip::ScalarStyle::DoubleQuoted; + break; + default: + break; + } + if (!quoted_original.empty()) { + this->roundtrip_->key_quoted_contents[this->pointer_stack_] = + std::string{quoted_original}; + } + this->pointer_stack_.pop_back(); + } + + auto detect_indent_width(const std::uint64_t parent_column, + const std::uint64_t child_column) -> void { + if (!this->roundtrip_ || this->indent_width_detected_) { + return; + } + if (parent_column > 0 && child_column > parent_column) { + this->roundtrip_->indent_width = + static_cast(child_column - parent_column); + this->indent_width_detected_ = true; + } + } + + inline static const std::string empty_property_{}; + Lexer *lexer_; + const JSON::ParseCallback *callback_; + YAMLRoundTrip *roundtrip_{nullptr}; + Pointer pointer_stack_; + std::unordered_map anchors_; + bool recording_anchor_{false}; + bool indent_width_detected_{false}; + std::vector current_anchor_callbacks_; + std::deque pending_tokens_; + std::optional pending_token_position_; + std::unordered_map tag_directives_; + std::uint64_t document_start_line_{0}; +}; + +} // namespace sourcemeta::core::yaml + +#endif diff --git a/vendor/core/src/core/yaml/stringify.h b/vendor/core/src/core/yaml/stringify.h new file mode 100644 index 00000000..d7734f85 --- /dev/null +++ b/vendor/core/src/core/yaml/stringify.h @@ -0,0 +1,872 @@ +#ifndef SOURCEMETA_CORE_YAML_STRINGIFY_H_ +#define SOURCEMETA_CORE_YAML_STRINGIFY_H_ + +#include +#include +#include + +#include // std::array +#include // assert +#include // std::modf +#include // std::size_t +#include // std::setprecision +#include // std::noshowpoint, std::fixed +#include // std::basic_ostream +#include // std::to_string + +namespace sourcemeta::core::yaml { + +using OutputStream = std::basic_ostream; + +static constexpr std::size_t INDENT_WIDTH{2}; +static constexpr std::array HEX_DIGITS{{'0', '1', '2', '3', '4', '5', + '6', '7', '8', '9', 'a', 'b', + 'c', 'd', 'e', 'f'}}; + +inline auto write_indent(OutputStream &stream, const std::size_t indent, + const std::size_t width = INDENT_WIDTH) -> void { + for (std::size_t index{0}; index < indent * width; ++index) { + stream.put(' '); + } +} + +inline auto looks_like_number(const std::string &value) -> bool { + std::size_t start{0}; + if (value[0] == '-' || value[0] == '+') { + start = 1; + } + + if (start >= value.size()) { + return false; + } + + if (value.size() > start + 1 && value[start] == '0') { + const char second{value[start + 1]}; + if (second == 'x' || second == 'X' || second == 'o' || second == 'O') { + return true; + } + } + + bool has_digit{false}; + bool has_dot{false}; + bool has_exponent{false}; + + for (std::size_t index{start}; index < value.size(); ++index) { + const char character{value[index]}; + if (character >= '0' && character <= '9') { + has_digit = true; + } else if (character == '.' && !has_dot && !has_exponent) { + has_dot = true; + } else if ((character == 'e' || character == 'E') && !has_exponent && + has_digit) { + has_exponent = true; + if (index + 1 < value.size() && + (value[index + 1] == '+' || value[index + 1] == '-')) { + ++index; + } + } else { + return false; + } + } + + return has_digit; +} + +inline auto needs_quoting(const std::string &value) -> bool { + if (value.empty()) { + return true; + } + + if (value == "null" || value == "Null" || value == "NULL" || value == "~" || + value == "true" || value == "True" || value == "TRUE" || + value == "false" || value == "False" || value == "FALSE") { + return true; + } + + if (value == ".inf" || value == ".Inf" || value == ".INF" || + value == "+.inf" || value == "+.Inf" || value == "+.INF" || + value == "-.inf" || value == "-.Inf" || value == "-.INF" || + value == ".nan" || value == ".NaN" || value == ".NAN") { + return true; + } + + if (value.size() >= 3 && + ((value[0] == '-' && value[1] == '-' && value[2] == '-') || + (value[0] == '.' && value[1] == '.' && value[2] == '.')) && + (value.size() == 3 || value[3] == ' ' || value[3] == '\t')) { + return true; + } + + if (looks_like_number(value)) { + return true; + } + + const char first{value[0]}; + + if (first == ',' || first == '[' || first == ']' || first == '{' || + first == '}' || first == '#' || first == '&' || first == '*' || + first == '!' || first == '|' || first == '>' || first == '\'' || + first == '"' || first == '%' || first == '@' || first == '`') { + return true; + } + + if (first == '-' || first == '?' || first == ':') { + if (value.size() == 1 || value[1] == ' ') { + return true; + } + } + + if (value.front() == ' ' || value.back() == ' ') { + return true; + } + + for (std::size_t index{0}; index < value.size(); ++index) { + const char character{value[index]}; + if (character < ' ') { + return true; + } + + if (character == ':' && + (index + 1 >= value.size() || value[index + 1] == ' ')) { + return true; + } + + if (character == ' ' && index + 1 < value.size() && + value[index + 1] == '#') { + return true; + } + } + + return false; +} + +inline auto can_single_quote(const std::string &value) -> bool { + for (const char character : value) { + if (character < ' ' && character != '\t') { + return false; + } + } + + return true; +} + +inline auto write_double_quoted(OutputStream &stream, const std::string &value) + -> void { + stream.put('"'); + for (const char character : value) { + switch (character) { + case '"': + stream.write("\\\"", 2); + break; + case '\\': + stream.write("\\\\", 2); + break; + case '\n': + stream.write("\\n", 2); + break; + case '\r': + stream.write("\\r", 2); + break; + case '\t': + stream.write("\\t", 2); + break; + case '\0': + stream.write("\\0", 2); + break; + default: + if (character >= '\x01' && character < '\x20') { + const auto byte{static_cast(character)}; + stream.write("\\x", 2); + stream.put(HEX_DIGITS[byte >> 4u]); + stream.put(HEX_DIGITS[byte & 0x0Fu]); + } else { + stream.put(character); + } + break; + } + } + stream.put('"'); +} + +inline auto write_single_quoted(OutputStream &stream, const std::string &value) + -> void { + stream.put('\''); + for (const char character : value) { + if (character == '\'') { + stream.write("''", 2); + } else { + stream.put(character); + } + } + stream.put('\''); +} + +inline auto write_string(OutputStream &stream, const std::string &value) + -> void { + if (needs_quoting(value)) { + write_double_quoted(stream, value); + } else { + stream.write(value.data(), static_cast(value.size())); + } +} + +inline auto write_block_scalar( + OutputStream &stream, const std::string &value, const std::size_t indent, + const YAMLRoundTrip::ScalarStyle style, + const YAMLRoundTrip::Chomping chomping, + const std::optional &header_comment = std::nullopt, + const std::size_t indent_width = INDENT_WIDTH, + const std::size_t explicit_indent = 0, + const bool indent_before_chomping = false) -> void { + stream.put(style == YAMLRoundTrip::ScalarStyle::Literal ? '|' : '>'); + if (indent_before_chomping && explicit_indent > 0) { + stream.put(static_cast('0' + explicit_indent)); + } + if (chomping == YAMLRoundTrip::Chomping::Strip) { + stream.put('-'); + } else if (chomping == YAMLRoundTrip::Chomping::Keep) { + stream.put('+'); + } + if (!indent_before_chomping && explicit_indent > 0) { + stream.put(static_cast('0' + explicit_indent)); + } + if (header_comment.has_value()) { + stream.put(' '); + const auto &comment{header_comment.value()}; + stream.write(comment.data(), static_cast(comment.size())); + } + stream.put('\n'); + + std::size_t position{0}; + while (position < value.size()) { + auto line_end{value.find('\n', position)}; + if (line_end == std::string::npos) { + write_indent(stream, indent, indent_width); + stream.write(value.data() + position, + static_cast(value.size() - position)); + stream.put('\n'); + break; + } + + if (line_end > position) { + write_indent(stream, indent, indent_width); + } + stream.write(value.data() + position, + static_cast(line_end - position)); + stream.put('\n'); + position = line_end + 1; + } +} + +inline auto write_string_with_style(OutputStream &stream, + const std::string &value, + const YAMLRoundTrip *roundtrip, + const Pointer &pointer) -> void { + if (roundtrip) { + const auto match{roundtrip->styles.find(pointer)}; + if (match != roundtrip->styles.end() && match->second.scalar.has_value()) { + if (match->second.quoted_content.has_value()) { + const auto &raw{match->second.quoted_content.value()}; + const auto quote_char{match->second.scalar.value() == + YAMLRoundTrip::ScalarStyle::SingleQuoted + ? '\'' + : '"'}; + stream.put(quote_char); + stream.write(raw.data(), static_cast(raw.size())); + stream.put(quote_char); + return; + } + switch (match->second.scalar.value()) { + case YAMLRoundTrip::ScalarStyle::SingleQuoted: + if (can_single_quote(value)) { + write_single_quoted(stream, value); + return; + } + break; + case YAMLRoundTrip::ScalarStyle::DoubleQuoted: + write_double_quoted(stream, value); + return; + default: + break; + } + } + } + + write_string(stream, value); +} + +inline auto write_key_string(OutputStream &stream, const std::string &key, + const YAMLRoundTrip *roundtrip, + const Pointer &pointer) -> void { + if (roundtrip) { + const auto quoted_match{roundtrip->key_quoted_contents.find(pointer)}; + if (quoted_match != roundtrip->key_quoted_contents.end()) { + const auto style_match{roundtrip->key_styles.find(pointer)}; + const auto quote_char{style_match != roundtrip->key_styles.end() && + style_match->second == + YAMLRoundTrip::ScalarStyle::SingleQuoted + ? '\'' + : '"'}; + stream.put(quote_char); + const auto &raw{quoted_match->second}; + stream.write(raw.data(), static_cast(raw.size())); + stream.put(quote_char); + return; + } + const auto match{roundtrip->key_styles.find(pointer)}; + if (match != roundtrip->key_styles.end()) { + switch (match->second) { + case YAMLRoundTrip::ScalarStyle::Plain: + stream.write(key.data(), static_cast(key.size())); + return; + case YAMLRoundTrip::ScalarStyle::SingleQuoted: + if (can_single_quote(key)) { + write_single_quoted(stream, key); + return; + } + break; + case YAMLRoundTrip::ScalarStyle::DoubleQuoted: + write_double_quoted(stream, key); + return; + default: + break; + } + } + } + write_string(stream, key); +} + +// Forward declarations for recursive flow collection writing +inline auto write_flow_mapping(OutputStream &stream, const JSON &value, + const YAMLRoundTrip *roundtrip, Pointer &pointer) + -> void; +inline auto write_flow_sequence(OutputStream &stream, const JSON &value, + const YAMLRoundTrip *roundtrip, + Pointer &pointer) -> void; + +inline auto write_inline_value(OutputStream &stream, const JSON &value, + const YAMLRoundTrip *roundtrip, Pointer &pointer) + -> void { + if (roundtrip) { + const auto alias_match{roundtrip->aliases.find(pointer)}; + if (alias_match != roundtrip->aliases.end()) { + stream.put('*'); + const auto &name{alias_match->second}; + stream.write(name.data(), static_cast(name.size())); + return; + } + + const auto style_match{roundtrip->styles.find(pointer)}; + if (style_match != roundtrip->styles.end() && + style_match->second.scalar.has_value() && + style_match->second.scalar.value() == + YAMLRoundTrip::ScalarStyle::Plain && + style_match->second.plain_content.has_value()) { + const auto &content{style_match->second.plain_content.value()}; + stream.write(content.data(), + static_cast(content.size())); + return; + } + } + switch (value.type()) { + case JSON::Type::Null: + stream.write("null", 4); + break; + case JSON::Type::Boolean: + if (value.to_boolean()) { + stream.write("true", 4); + } else { + stream.write("false", 5); + } + break; + case JSON::Type::Integer: { + const auto string{std::to_string(value.to_integer())}; + stream.write(string.c_str(), static_cast(string.size())); + } break; + case JSON::Type::Real: { + const auto real{value.to_real()}; + if (real == 0.0) { + stream.write("0.0", 3); + } else { + const auto flags{stream.flags()}; + const auto precision{stream.precision()}; + double integer_part; + if (std::modf(real, &integer_part) == 0.0) { + stream << std::fixed << std::setprecision(1) << real; + } else { + stream << std::noshowpoint << real; + } + stream.flags(flags); + stream.precision(precision); + } + } break; + case JSON::Type::Decimal: + stream << value.to_decimal().to_scientific_string(); + break; + case JSON::Type::String: + write_string_with_style(stream, value.to_string(), roundtrip, pointer); + break; + case JSON::Type::Object: + if (value.empty()) { + stream.write("{}", 2); + } else { + write_flow_mapping(stream, value, roundtrip, pointer); + } + break; + case JSON::Type::Array: + if (value.empty()) { + stream.write("[]", 2); + } else { + write_flow_sequence(stream, value, roundtrip, pointer); + } + break; + } +} + +inline auto is_implicit_null(const JSON &value, const YAMLRoundTrip *roundtrip, + const Pointer &pointer) -> bool { + if (!roundtrip || !value.is_null()) { + return false; + } + if (roundtrip->aliases.contains(pointer)) { + return false; + } + const auto match{roundtrip->styles.find(pointer)}; + if (match == roundtrip->styles.end()) { + return true; + } + return !match->second.scalar.has_value(); +} + +inline auto write_flow_anchor(OutputStream &stream, + const YAMLRoundTrip *roundtrip, + const Pointer &pointer) -> void { + if (!roundtrip) { + return; + } + const auto match{roundtrip->styles.find(pointer)}; + if (match != roundtrip->styles.end() && match->second.anchor.has_value()) { + stream.put('&'); + const auto &anchor_name{match->second.anchor.value()}; + stream.write(anchor_name.data(), + static_cast(anchor_name.size())); + stream.put(' '); + } +} + +inline auto write_flow_mapping(OutputStream &stream, const JSON &value, + const YAMLRoundTrip *roundtrip, Pointer &pointer) + -> void { + bool compact{false}; + if (roundtrip) { + const auto match{roundtrip->styles.find(pointer)}; + if (match != roundtrip->styles.end()) { + compact = match->second.compact_flow; + } + } + stream.put('{'); + bool first{true}; + for (const auto &entry : value.as_object()) { + if (!first) { + if (compact) { + stream.put(','); + } else { + stream.write(", ", 2); + } + } + first = false; + pointer.push_back(entry.first); + write_key_string(stream, entry.first, roundtrip, pointer); + stream.write(": ", 2); + if (!is_implicit_null(entry.second, roundtrip, pointer)) { + write_flow_anchor(stream, roundtrip, pointer); + write_inline_value(stream, entry.second, roundtrip, pointer); + } + pointer.pop_back(); + } + stream.put('}'); +} + +inline auto write_flow_sequence(OutputStream &stream, const JSON &value, + const YAMLRoundTrip *roundtrip, + Pointer &pointer) -> void { + bool compact{false}; + if (roundtrip) { + const auto match{roundtrip->styles.find(pointer)}; + if (match != roundtrip->styles.end()) { + compact = match->second.compact_flow; + } + } + stream.put('['); + bool first{true}; + std::size_t item_index{0}; + for (const auto &item : value.as_array()) { + if (!first) { + if (compact) { + stream.put(','); + } else { + stream.write(", ", 2); + } + } + first = false; + pointer.push_back(item_index); + write_flow_anchor(stream, roundtrip, pointer); + write_inline_value(stream, item, roundtrip, pointer); + pointer.pop_back(); + item_index++; + } + stream.put(']'); +} + +inline auto write_block_mapping(OutputStream &stream, const JSON &value, + std::size_t indent, bool skip_first_indent, + const YAMLRoundTrip *roundtrip, + Pointer &pointer) -> void; +inline auto write_block_sequence(OutputStream &stream, const JSON &value, + std::size_t indent, bool skip_first_indent, + const YAMLRoundTrip *roundtrip, + Pointer &pointer) -> void; + +inline auto emit_inline_comment(OutputStream &stream, + const YAMLRoundTrip::NodeStyle *style) -> void { + if (style && style->comment_inline.has_value()) { + stream.put(' '); + const auto &comment{style->comment_inline.value()}; + stream.write(comment.data(), static_cast(comment.size())); + } +} + +inline auto write_node(OutputStream &stream, const JSON &value, + const std::size_t indent, const bool skip_first_indent, + const YAMLRoundTrip *roundtrip, Pointer &pointer) + -> void { + const YAMLRoundTrip::NodeStyle *node_style{nullptr}; + if (roundtrip) { + const auto style_match{roundtrip->styles.find(pointer)}; + if (style_match != roundtrip->styles.end()) { + node_style = &style_match->second; + } + const auto alias_match{roundtrip->aliases.find(pointer)}; + if (alias_match != roundtrip->aliases.end()) { + stream.put('*'); + const auto &name{alias_match->second}; + stream.write(name.data(), static_cast(name.size())); + emit_inline_comment(stream, node_style); + stream.put('\n'); + return; + } + } + + bool has_anchor{false}; + if (node_style && node_style->anchor.has_value()) { + stream.put('&'); + const auto &name{node_style->anchor.value()}; + stream.write(name.data(), static_cast(name.size())); + has_anchor = true; + } + + const bool flow{node_style && node_style->collection.has_value() && + node_style->collection.value() == + YAMLRoundTrip::CollectionStyle::Flow}; + + if (value.is_object() && !value.empty()) { + if (flow) { + if (has_anchor) { + stream.put(' '); + } + write_flow_mapping(stream, value, roundtrip, pointer); + emit_inline_comment(stream, node_style); + stream.put('\n'); + } else { + if (has_anchor) { + emit_inline_comment(stream, node_style); + stream.put('\n'); + } + write_block_mapping(stream, value, indent, + has_anchor ? false : skip_first_indent, roundtrip, + pointer); + } + } else if (value.is_array() && !value.empty()) { + if (flow) { + if (has_anchor) { + stream.put(' '); + } + write_flow_sequence(stream, value, roundtrip, pointer); + emit_inline_comment(stream, node_style); + stream.put('\n'); + } else { + if (has_anchor) { + emit_inline_comment(stream, node_style); + stream.put('\n'); + } + write_block_sequence(stream, value, indent, + has_anchor ? false : skip_first_indent, roundtrip, + pointer); + } + } else if (node_style && value.is_string() && + node_style->scalar.has_value() && + (node_style->scalar.value() == + YAMLRoundTrip::ScalarStyle::Literal || + node_style->scalar.value() == + YAMLRoundTrip::ScalarStyle::Folded)) { + if (has_anchor) { + stream.put(' '); + } + const auto chomping{ + node_style->chomping.value_or(YAMLRoundTrip::Chomping::Clip)}; + const auto &content{node_style->block_content.has_value() + ? node_style->block_content.value() + : value.to_string()}; + write_block_scalar(stream, content, indent, node_style->scalar.value(), + chomping, node_style->comment_inline, + roundtrip->indent_width, node_style->explicit_indent, + node_style->indent_before_chomping); + } else { + if (has_anchor) { + stream.put(' '); + } + write_inline_value(stream, value, roundtrip, pointer); + emit_inline_comment(stream, node_style); + stream.put('\n'); + } +} + +inline auto write_block_mapping(OutputStream &stream, const JSON &value, + const std::size_t indent, + const bool skip_first_indent, + const YAMLRoundTrip *roundtrip, + Pointer &pointer) -> void { + assert(value.is_object() && !value.empty()); + const auto width{roundtrip ? roundtrip->indent_width : INDENT_WIDTH}; + bool first{true}; + for (const auto &entry : value.as_object()) { + pointer.push_back(entry.first); + + const YAMLRoundTrip::NodeStyle *entry_style{nullptr}; + bool entry_is_alias{false}; + if (roundtrip) { + entry_is_alias = roundtrip->aliases.contains(pointer); + const auto style_match{roundtrip->styles.find(pointer)}; + if (style_match != roundtrip->styles.end()) { + entry_style = &style_match->second; + } + } + + if (!first || !skip_first_indent) { + if (entry_style && !entry_style->comments_before.empty()) { + for (const auto &comment : entry_style->comments_before) { + if (comment.empty()) { + stream.put('\n'); + } else { + write_indent(stream, indent, width); + stream.write(comment.data(), + static_cast(comment.size())); + stream.put('\n'); + } + } + } + write_indent(stream, indent, width); + } + first = false; + + write_key_string(stream, entry.first, roundtrip, pointer); + stream.put(':'); + + const bool implicit_null{ + roundtrip && entry.second.is_null() && !entry_is_alias && + (!entry_style || !entry_style->scalar.has_value())}; + if (implicit_null) { + if (entry_style && entry_style->anchor.has_value()) { + stream.put(' '); + stream.put('&'); + const auto &name{entry_style->anchor.value()}; + stream.write(name.data(), static_cast(name.size())); + } + emit_inline_comment(stream, entry_style); + stream.put('\n'); + } else { + bool has_indicator_comment{false}; + if (entry_style && entry_style->comment_on_indicator.has_value()) { + has_indicator_comment = true; + stream.put(' '); + const auto &comment{entry_style->comment_on_indicator.value()}; + stream.write(comment.data(), + static_cast(comment.size())); + stream.put('\n'); + write_indent(stream, indent + 1, width); + } + if (!has_indicator_comment) { + const bool has_prefix{entry_is_alias || + (entry_style && entry_style->anchor.has_value())}; + const bool entry_flow{entry_style && + entry_style->collection.has_value() && + entry_style->collection.value() == + YAMLRoundTrip::CollectionStyle::Flow}; + const bool nested{ + (entry.second.is_object() || entry.second.is_array()) && + !entry.second.empty() && !entry_flow && !has_prefix}; + if (nested) { + emit_inline_comment(stream, entry_style); + stream.put('\n'); + } else { + stream.put(' '); + } + } + write_node(stream, entry.second, indent + 1, + has_indicator_comment ? true : false, roundtrip, pointer); + } + + pointer.pop_back(); + } +} + +inline auto write_block_sequence(OutputStream &stream, const JSON &value, + const std::size_t indent, + const bool skip_first_indent, + const YAMLRoundTrip *roundtrip, + Pointer &pointer) -> void { + assert(value.is_array() && !value.empty()); + const auto width{roundtrip ? roundtrip->indent_width : INDENT_WIDTH}; + bool first{true}; + std::size_t item_index{0}; + for (const auto &item : value.as_array()) { + pointer.push_back(item_index); + + // Single lookup for alias and style per item + const YAMLRoundTrip::NodeStyle *item_style{nullptr}; + bool item_is_alias{false}; + if (roundtrip) { + item_is_alias = roundtrip->aliases.contains(pointer); + const auto style_match{roundtrip->styles.find(pointer)}; + if (style_match != roundtrip->styles.end()) { + item_style = &style_match->second; + } + } + + if (!first || !skip_first_indent) { + if (item_style && !item_style->comments_before.empty()) { + for (const auto &comment : item_style->comments_before) { + if (comment.empty()) { + stream.put('\n'); + } else { + write_indent(stream, indent, width); + stream.write(comment.data(), + static_cast(comment.size())); + stream.put('\n'); + } + } + } + write_indent(stream, indent, width); + } + first = false; + + const bool implicit_null{roundtrip && item.is_null() && !item_is_alias && + (!item_style || !item_style->scalar.has_value())}; + if (implicit_null) { + stream.put('-'); + if (item_style) { + if (item_style->anchor.has_value()) { + stream.put(' '); + stream.put('&'); + const auto &name{item_style->anchor.value()}; + stream.write(name.data(), static_cast(name.size())); + } + if (item_style->comment_on_indicator.has_value() && + !item_style->comment_on_indicator.value().empty()) { + stream.put(' '); + const auto &comment{item_style->comment_on_indicator.value()}; + stream.write(comment.data(), + static_cast(comment.size())); + } + } + emit_inline_comment(stream, item_style); + stream.put('\n'); + } else { + bool has_indicator{false}; + if (item_style && item_style->comment_on_indicator.has_value()) { + has_indicator = true; + const auto &comment{item_style->comment_on_indicator.value()}; + if (comment.empty()) { + stream.put('-'); + } else { + stream.write("- ", 2); + stream.write(comment.data(), + static_cast(comment.size())); + } + stream.put('\n'); + write_indent(stream, indent + 1, width); + } + if (!has_indicator) { + stream.write("- ", 2); + } + write_node(stream, item, indent + 1, true, roundtrip, pointer); + } + + pointer.pop_back(); + item_index++; + } +} + +template