diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb index de5d5758b46c42..dc1c8952c6e6cd 100644 --- a/ext/json/parser/extconf.rb +++ b/ext/json/parser/extconf.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'mkmf' +$defs << "-DJSON_DEBUG" if ENV["JSON_DEBUG"] have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0 have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 555652f42582b1..8206716d705cb8 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -612,12 +612,14 @@ json_eat_whitespace(JSON_ParserState *state) while (rest(state) > 8) { uint64_t chunk; memcpy(&chunk, state->cursor, sizeof(uint64_t)); - size_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT; + if (chunk == 0x2020202020202020) { + state->cursor += 8; + continue; + } + uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT; state->cursor += consecutive_spaces; - if (consecutive_spaces != 8) { - break; - } + break; } #endif break; @@ -1101,13 +1103,21 @@ static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulat continue; } - if ((match & 0xFFFFFFFF) == 0x33333333) { // 4 consecutive digits + uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT; + + if (consecutive_digits >= 4) { *accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes); state->cursor += 4; - break; + consecutive_digits -= 4; + } + + while (consecutive_digits) { + *accumulator = *accumulator * 10 + (*state->cursor - '0'); + consecutive_digits--; + state->cursor++; } - break; + return (int)(state->cursor - start); } #endif diff --git a/ext/json/simd/simd.h b/ext/json/simd/simd.h index 2aa6c3d046a764..0abe4fad658813 100644 --- a/ext/json/simd/simd.h +++ b/ext/json/simd/simd.h @@ -1,3 +1,7 @@ +#ifdef JSON_DEBUG +#include +#endif + typedef enum { SIMD_NONE, SIMD_NEON, @@ -18,6 +22,10 @@ typedef enum { static inline uint32_t trailing_zeros64(uint64_t input) { +#ifdef JSON_DEBUG + assert(input > 0); // __builtin_ctz(0) is undefined behavior +#endif + #if HAVE_BUILTIN_CTZLL return __builtin_ctzll(input); #else @@ -33,6 +41,10 @@ static inline uint32_t trailing_zeros64(uint64_t input) static inline int trailing_zeros(int input) { +#ifdef JSON_DEBUG + assert(input > 0); // __builtin_ctz(0) is undefined behavior +#endif + #if HAVE_BUILTIN_CTZLL return __builtin_ctz(input); #else diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 30188c4ebdea32..bab16a6fc21dbc 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -804,6 +804,10 @@ def test_parse_leading_slash end end + def test_parse_whitespace_after_newline + assert_equal [], JSON.parse("[\n#{' ' * (8 + 8 + 4 + 3)}]") + end + private def assert_equal_float(expected, actual, delta = 1e-2) diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index adbfe328cccfbf..99ab85ddbe9faa 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -3807,6 +3807,36 @@ class MyError < StandardError; end end end + def test_encode_fallback_not_string_memory_leak + { + "hash" => <<~RUBY, + fallback = Hash.new { Object.new } + RUBY + "proc" => <<~RUBY, + fallback = proc { Object.new } + RUBY + "method" => <<~RUBY, + def my_method(_str) = Object.new + fallback = method(:my_method) + RUBY + "aref" => <<~RUBY, + fallback = Object.new + def fallback.[](_str) = Object.new + RUBY + }.each do |type, code| + assert_no_memory_leak([], '', <<~RUBY, "fallback type is #{type}", rss: true) + class MyError < StandardError; end + + #{code} + + 100_000.times do |i| + "\\ufffd".encode(Encoding::US_ASCII, fallback:) + rescue TypeError + end + RUBY + end + end + private def assert_bytesplice_result(expected, s, *args) diff --git a/transcode.c b/transcode.c index 20b92b66f7ae21..86e828c47911a3 100644 --- a/transcode.c +++ b/transcode.c @@ -2360,7 +2360,13 @@ transcode_loop_fallback_try(VALUE a) { struct transcode_loop_fallback_args *args = (struct transcode_loop_fallback_args *)a; - return args->fallback_func(args->fallback, args->rep); + VALUE ret = args->fallback_func(args->fallback, args->rep); + + if (!UNDEF_P(ret) && !NIL_P(ret)) { + StringValue(ret); + } + + return ret; } static void @@ -2428,7 +2434,6 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, } if (!UNDEF_P(rep) && !NIL_P(rep)) { - StringValue(rep); ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(rep), RSTRING_LEN(rep), rb_enc_name(rb_enc_get(rep))); if ((int)ret == -1) {