From a6914c089dfbc50fa5f13e80eb8802d68c525b00 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 29 Jul 2025 19:35:25 +0900 Subject: [PATCH 1/7] Fix the current parser detection Since `RUBY_DESCRIPTION` contains the branch name, `/prism/i` can match unexpectedly. Extract the feature lists between revision and platform infos. --- .github/workflows/parse_y.yml | 4 +++- tool/lib/envutil.rb | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/parse_y.yml b/.github/workflows/parse_y.yml index 9345a1f7a465fc..c77b4516d1e737 100644 --- a/.github/workflows/parse_y.yml +++ b/.github/workflows/parse_y.yml @@ -78,7 +78,9 @@ jobs: - run: make - - run: make TESTRUN_SCRIPT='-e "exit !RUBY_DESCRIPTION.include?(%[+PRISM])"' run + - run: make TESTRUN_SCRIPT='-renvutil -v -e "exit EnvUtil.current_parser == %[parse.y]"' run + env: + RUNOPT0: -I$(tooldir)/lib - name: make ${{ matrix.test_task }} run: make -s ${{ matrix.test_task }} RUN_OPTS="$RUN_OPTS" SPECOPTS="$SPECOPTS" diff --git a/tool/lib/envutil.rb b/tool/lib/envutil.rb index d02329d4f13053..fe166d85145ea7 100644 --- a/tool/lib/envutil.rb +++ b/tool/lib/envutil.rb @@ -226,7 +226,6 @@ def invoke_ruby(args, stdin_data = "", capture_stdout = false, capture_stderr = args = [args] if args.kind_of?(String) # use the same parser as current ruby if args.none? { |arg| arg.start_with?("--parser=") } - current_parser = RUBY_DESCRIPTION =~ /prism/i ? "prism" : "parse.y" args = ["--parser=#{current_parser}"] + args end pid = spawn(child_env, *precommand, rubybin, *args, opt) @@ -276,6 +275,12 @@ def invoke_ruby(args, stdin_data = "", capture_stdout = false, capture_stderr = end module_function :invoke_ruby + def current_parser + features = RUBY_DESCRIPTION[%r{\)\K [-+*/%._0-9a-zA-Z ]*(?=\[[-+*/%._0-9a-zA-Z]+\]\z)}] + features&.split&.include?("+PRISM") ? "prism" : "parse.y" + end + module_function :current_parser + def verbose_warning class << (stderr = "".dup) alias write concat From e0818ac659ee91f30531c96ece3923abb0de1e73 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Tue, 29 Jul 2025 20:22:50 +0900 Subject: [PATCH 2/7] Fix stripping features from the description --- spec/ruby/command_line/dash_v_spec.rb | 2 +- spec/ruby/command_line/rubyopt_spec.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/ruby/command_line/dash_v_spec.rb b/spec/ruby/command_line/dash_v_spec.rb index d30efa37d68c23..b13350404c659c 100644 --- a/spec/ruby/command_line/dash_v_spec.rb +++ b/spec/ruby/command_line/dash_v_spec.rb @@ -6,7 +6,7 @@ describe "when used alone" do it "prints version and ends" do - ruby_exe(nil, args: '-v').sub("+PRISM ", "").should include(RUBY_DESCRIPTION.sub("+PRISM ", "")) + ruby_exe(nil, args: '-v').gsub("+PRISM ", "").should include(RUBY_DESCRIPTION.gsub("+PRISM ", "")) end unless (defined?(RubyVM::YJIT) && RubyVM::YJIT.enabled?) || (defined?(RubyVM::RJIT) && RubyVM::RJIT.enabled?) || (ENV['RUBY_GC_LIBRARY'] && ENV['RUBY_GC_LIBRARY'].length > 0) || diff --git a/spec/ruby/command_line/rubyopt_spec.rb b/spec/ruby/command_line/rubyopt_spec.rb index e940f912af8f7b..e1163ffcfcc9d7 100644 --- a/spec/ruby/command_line/rubyopt_spec.rb +++ b/spec/ruby/command_line/rubyopt_spec.rb @@ -25,12 +25,12 @@ guard -> { RbConfig::CONFIG["CROSS_COMPILING"] != "yes" } do it "prints the version number for '-v'" do ENV["RUBYOPT"] = '-v' - ruby_exe("").sub("+PRISM ", "").sub(/\+GC(\[\w+\]\s|\s)?/, "")[/\A.*/].should == RUBY_DESCRIPTION.sub("+PRISM ", "").sub(/\+GC(\[\w+\]\s|\s)?/, "") + ruby_exe("")[/\A.*/].gsub(/\s\+(PRISM|GC(\[\w+\])?)(?=\s)/, "").should == RUBY_DESCRIPTION.gsub(/\s\+(PRISM|GC(\[\w+\])?)(?=\s)/, "") end it "ignores whitespace around the option" do ENV["RUBYOPT"] = ' -v ' - ruby_exe("").sub("+PRISM ", "").sub(/\+GC(\[\w+\]\s|\s)?/, "")[/\A.*/].should == RUBY_DESCRIPTION.sub("+PRISM ", "").sub(/\+GC(\[\w+\]\s|\s)?/, "") + ruby_exe("")[/\A.*/].gsub(/\s\+(PRISM|GC(\[\w+\])?)(?=\s)/, "").should == RUBY_DESCRIPTION.gsub(/\s\+(PRISM|GC(\[\w+\])?)(?=\s)/, "") end end From 46d106f7ab56e32e9ac6309978a75a4fa281ca4d Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 29 Jul 2025 10:53:52 -0400 Subject: [PATCH 3/7] Fix indentation in switch in rb_gc_impl_mark_maybe [ci skip] --- gc/default/default.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index de3bee15223b19..47cfe3fb3baff3 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -4454,10 +4454,10 @@ rb_gc_impl_mark_maybe(void *objspace_ptr, VALUE obj) asan_unpoisoning_object(obj) { /* Garbage can live on the stack, so do not mark or pin */ switch (BUILTIN_TYPE(obj)) { - case T_ZOMBIE: - case T_NONE: + case T_ZOMBIE: + case T_NONE: break; - default: + default: gc_mark_and_pin(objspace, obj); break; } From a66e4f21542aae99bde7c50e7af4fb0283a81070 Mon Sep 17 00:00:00 2001 From: tomoya ishida Date: Wed, 30 Jul 2025 01:34:13 +0900 Subject: [PATCH 4/7] Improve performance of bignum[beg, len] (#14007) Implement rb_big_aref2. Taking a small slice from large bignum was slow in rb_int_aref2. --- bignum.c | 67 ++++++++++++++++++++++++++++++++++++++++ internal/bignum.h | 1 + internal/numeric.h | 1 + numeric.c | 37 +++++++++++----------- test/ruby/test_bignum.rb | 43 ++++++++++++++++++++++++++ 5 files changed, 131 insertions(+), 18 deletions(-) diff --git a/bignum.c b/bignum.c index ac86951372f4d4..2e135caf20fb09 100644 --- a/bignum.c +++ b/bignum.c @@ -6757,6 +6757,73 @@ rb_big_aref(VALUE x, VALUE y) return (xds[s1] & bit) ? INT2FIX(1) : INT2FIX(0); } +VALUE +rb_big_aref2(VALUE x, VALUE beg, VALUE len) +{ + BDIGIT *xds, *vds; + VALUE v; + size_t copy_begin, xn, shift; + ssize_t begin, length, end; + bool negative_add_one; + + beg = rb_to_int(beg); + len = rb_to_int(len); + length = NUM2SSIZET(len); + begin = NUM2SSIZET(beg); + end = NUM2SSIZET(rb_int_plus(beg, len)); + shift = begin < 0 ? -begin : 0; + xn = BIGNUM_LEN(x); + xds = BDIGITS(x); + + if (length < 0) return rb_big_rshift(x, beg); + if (length == 0 || end <= 0) return INT2FIX(0); + if (begin < 0) begin = 0; + + if ((size_t)(end - 1) / BITSPERDIG >= xn) { + /* end > xn * BITSPERDIG */ + end = xn * BITSPERDIG; + } + + if ((size_t)begin / BITSPERDIG < xn) { + /* begin < xn * BITSPERDIG */ + size_t shift_bits, copy_end; + copy_begin = begin / BITSPERDIG; + shift_bits = begin % BITSPERDIG; + copy_end = (end - 1) / BITSPERDIG + 1; + v = bignew(copy_end - copy_begin, 1); + vds = BDIGITS(v); + MEMCPY(vds, xds + copy_begin, BDIGIT, copy_end - copy_begin); + negative_add_one = (vds[0] & ((1 << shift_bits) - 1)) == 0; + v = bignorm(v); + if (shift_bits) v = rb_int_rshift(v, SIZET2NUM(shift_bits)); + } + else { + /* Out of range */ + v = INT2FIX(0); + negative_add_one = false; + copy_begin = begin = end = 0; + } + + if (BIGNUM_NEGATIVE_P(x)) { + size_t mask_size = length - shift; + VALUE mask = rb_int_minus(rb_int_lshift(INT2FIX(1), SIZET2NUM(mask_size)), INT2FIX(1)); + v = rb_int_xor(v, mask); + for (size_t i = 0; negative_add_one && i < copy_begin; i++) { + if (xds[i]) negative_add_one = false; + } + if (negative_add_one) v = rb_int_plus(v, INT2FIX(1)); + v = rb_int_and(v, mask); + } + else { + size_t mask_size = (size_t)end - begin; + VALUE mask = rb_int_minus(rb_int_lshift(INT2FIX(1), SIZET2NUM(mask_size)), INT2FIX(1)); + v = rb_int_and(v, mask); + } + RB_GC_GUARD(x); + if (shift) v = rb_int_lshift(v, SSIZET2NUM(shift)); + return v; +} + VALUE rb_big_hash(VALUE x) { diff --git a/internal/bignum.h b/internal/bignum.h index db8d3aee83852e..0ba21a492334f1 100644 --- a/internal/bignum.h +++ b/internal/bignum.h @@ -121,6 +121,7 @@ VALUE rb_integer_float_eq(VALUE x, VALUE y); VALUE rb_str_convert_to_inum(VALUE str, int base, int badcheck, int raise_exception); VALUE rb_big_comp(VALUE x); VALUE rb_big_aref(VALUE x, VALUE y); +VALUE rb_big_aref2(VALUE num, VALUE beg, VALUE len); VALUE rb_big_abs(VALUE x); VALUE rb_big_size_m(VALUE big); VALUE rb_big_bit_length(VALUE big); diff --git a/internal/numeric.h b/internal/numeric.h index 6406cfc2fa7480..58f42f41ac4bea 100644 --- a/internal/numeric.h +++ b/internal/numeric.h @@ -85,6 +85,7 @@ VALUE rb_int_cmp(VALUE x, VALUE y); VALUE rb_int_equal(VALUE x, VALUE y); VALUE rb_int_divmod(VALUE x, VALUE y); VALUE rb_int_and(VALUE x, VALUE y); +VALUE rb_int_xor(VALUE x, VALUE y); VALUE rb_int_lshift(VALUE x, VALUE y); VALUE rb_int_rshift(VALUE x, VALUE y); VALUE rb_int_div(VALUE x, VALUE y); diff --git a/numeric.c b/numeric.c index d3affed8049b34..89cff8a730fc9c 100644 --- a/numeric.c +++ b/numeric.c @@ -5115,8 +5115,8 @@ fix_xor(VALUE x, VALUE y) * */ -static VALUE -int_xor(VALUE x, VALUE y) +VALUE +rb_int_xor(VALUE x, VALUE y) { if (FIXNUM_P(x)) { return fix_xor(x, y); @@ -5288,10 +5288,23 @@ generate_mask(VALUE len) return rb_int_minus(rb_int_lshift(INT2FIX(1), len), INT2FIX(1)); } +static VALUE +int_aref2(VALUE num, VALUE beg, VALUE len) +{ + if (RB_TYPE_P(num, T_BIGNUM)) { + return rb_big_aref2(num, beg, len); + } + else { + num = rb_int_rshift(num, beg); + VALUE mask = generate_mask(len); + return rb_int_and(num, mask); + } +} + static VALUE int_aref1(VALUE num, VALUE arg) { - VALUE orig_num = num, beg, end; + VALUE beg, end; int excl; if (rb_range_values(arg, &beg, &end, &excl)) { @@ -5311,22 +5324,19 @@ int_aref1(VALUE num, VALUE arg) return INT2FIX(0); } } - num = rb_int_rshift(num, beg); int cmp = compare_indexes(beg, end); if (!NIL_P(end) && cmp < 0) { VALUE len = rb_int_minus(end, beg); if (!excl) len = rb_int_plus(len, INT2FIX(1)); - VALUE mask = generate_mask(len); - num = rb_int_and(num, mask); + return int_aref2(num, beg, len); } else if (cmp == 0) { if (excl) return INT2FIX(0); - num = orig_num; arg = beg; goto one_bit; } - return num; + return rb_int_rshift(num, beg); } one_bit: @@ -5339,15 +5349,6 @@ int_aref1(VALUE num, VALUE arg) return Qnil; } -static VALUE -int_aref2(VALUE num, VALUE beg, VALUE len) -{ - num = rb_int_rshift(num, beg); - VALUE mask = generate_mask(len); - num = rb_int_and(num, mask); - return num; -} - /* * call-seq: * self[offset] -> 0 or 1 @@ -6366,7 +6367,7 @@ Init_Numeric(void) rb_define_method(rb_cInteger, "&", rb_int_and, 1); rb_define_method(rb_cInteger, "|", int_or, 1); - rb_define_method(rb_cInteger, "^", int_xor, 1); + rb_define_method(rb_cInteger, "^", rb_int_xor, 1); rb_define_method(rb_cInteger, "[]", int_aref, -1); rb_define_method(rb_cInteger, "<<", rb_int_lshift, 1); diff --git a/test/ruby/test_bignum.rb b/test/ruby/test_bignum.rb index beef33e2a60ff4..dd6f4baa4c292b 100644 --- a/test/ruby/test_bignum.rb +++ b/test/ruby/test_bignum.rb @@ -605,6 +605,49 @@ def test_aref assert_equal(1, (-2**(BIGNUM_MIN_BITS*4))[BIGNUM_MIN_BITS*4]) end + def test_aref2 + x = (0x123456789abcdef << (BIGNUM_MIN_BITS + 32)) | 0x12345678 + assert_equal(x, x[0, x.bit_length]) + assert_equal(x >> 10, x[10, x.bit_length]) + assert_equal(0x45678, x[0, 20]) + assert_equal(0x6780, x[-4, 16]) + assert_equal(0x123456, x[x.bit_length - 21, 40]) + assert_equal(0x6789ab, x[x.bit_length - 41, 24]) + assert_equal(0, x[-20, 10]) + assert_equal(0, x[x.bit_length + 10, 10]) + + assert_equal(0, x[5, 0]) + assert_equal(0, (-x)[5, 0]) + + assert_equal(x >> 5, x[5, -1]) + assert_equal(x << 5, x[-5, -1]) + assert_equal((-x) >> 5, (-x)[5, -1]) + assert_equal((-x) << 5, (-x)[-5, -1]) + + assert_equal(x << 5, x[-5, FIXNUM_MAX]) + assert_equal(x >> 5, x[5, FIXNUM_MAX]) + assert_equal(0, x[FIXNUM_MIN, 100]) + assert_equal(0, (-x)[FIXNUM_MIN, 100]) + + y = (x << 160) | 0x1234_0000_0000_0000_1234_0000_0000_0000 + assert_equal(0xffffedcc00, (-y)[40, 40]) + assert_equal(0xfffffffedc, (-y)[52, 40]) + assert_equal(0xffffedcbff, (-y)[104, 40]) + assert_equal(0xfffff6e5d4, (-y)[y.bit_length - 20, 40]) + assert_equal(0, (-y)[-20, 10]) + assert_equal(0xfff, (-y)[y.bit_length + 10, 12]) + + z = (1 << (BIGNUM_MIN_BITS * 2)) - 1 + assert_equal(0x400, (-z)[-10, 20]) + assert_equal(1, (-z)[0, 20]) + assert_equal(0, (-z)[10, 20]) + assert_equal(1, (-z)[0, z.bit_length]) + assert_equal(0, (-z)[z.bit_length - 10, 10]) + assert_equal(0x400, (-z)[z.bit_length - 10, 11]) + assert_equal(0xfff, (-z)[z.bit_length, 12]) + assert_equal(0xfff00, (-z)[z.bit_length - 8, 20]) + end + def test_hash assert_nothing_raised { T31P.hash } end From b22eb0e468a88d2da77cecf2355623fe7eff1ba6 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Tue, 29 Jul 2025 10:00:15 -0700 Subject: [PATCH 5/7] ZJIT: Add --zjit-stats (#14034) --- configure.ac | 9 ++++-- internal/vm.h | 2 +- vm.c | 4 +-- vm_exec.c | 4 +-- vm_insnhelper.h | 2 +- yjit.c | 2 +- yjit.h | 5 ++- zjit.c | 10 +++--- zjit.h | 5 +++ zjit.rb | 57 +++++++++++++++++++++++++++++++++- zjit/bindgen/src/main.rs | 3 ++ zjit/src/codegen.rs | 12 +++++++ zjit/src/cruby_bindings.inc.rs | 1 + zjit/src/hir.rs | 21 ++++++++++--- zjit/src/options.rs | 28 +++++++++++++++++ zjit/src/state.rs | 15 ++++++++- zjit/src/stats.rs | 53 +++++++++++++++++++++++++++++++ 17 files changed, 210 insertions(+), 23 deletions(-) diff --git a/configure.ac b/configure.ac index 6c455b3d67f309..c8018cdabb490a 100644 --- a/configure.ac +++ b/configure.ac @@ -3976,7 +3976,7 @@ AS_CASE(["${YJIT_SUPPORT}"], ZJIT_LIBS= AS_CASE(["${ZJIT_SUPPORT}"], -[yes|dev|dev_nodebug], [ +[yes|dev|dev_nodebug|stats], [ AS_IF([test x"$RUSTC" = "xno"], AC_MSG_ERROR([rustc is required. Installation instructions available at https://www.rust-lang.org/tools/install]) ) @@ -3987,11 +3987,16 @@ AS_CASE(["${ZJIT_SUPPORT}"], [dev], [ rb_cargo_features="$rb_cargo_features,disasm" JIT_CARGO_SUPPORT=dev - AC_DEFINE(RUBY_DEBUG, 1) + AC_DEFINE(RUBY_DEBUG, 1) ], [dev_nodebug], [ rb_cargo_features="$rb_cargo_features,disasm" JIT_CARGO_SUPPORT=dev_nodebug + AC_DEFINE(ZJIT_STATS, 1) + ], + [stats], [ + JIT_CARGO_SUPPORT=stats + AC_DEFINE(ZJIT_STATS, 1) ]) ZJIT_LIBS="target/release/libzjit.a" diff --git a/internal/vm.h b/internal/vm.h index 3ee958a020f474..3a99011c44b2d8 100644 --- a/internal/vm.h +++ b/internal/vm.h @@ -79,7 +79,7 @@ VALUE rb_block_call2(VALUE obj, ID mid, int argc, const VALUE *argv, rb_block_ca struct vm_ifunc *rb_current_ifunc(void); VALUE rb_gccct_clear_table(VALUE); -#if USE_YJIT +#if USE_YJIT || USE_ZJIT /* vm_exec.c */ extern uint64_t rb_vm_insns_count; #endif diff --git a/vm.c b/vm.c index bfc9ff733c5cdb..da5a51d25b73ba 100644 --- a/vm.c +++ b/vm.c @@ -35,6 +35,8 @@ #include "iseq.h" #include "symbol.h" // This includes a macro for a more performant rb_id2sym. #include "yjit.h" +#include "insns.inc" +#include "zjit.h" #include "ruby/st.h" #include "ruby/vm.h" #include "vm_core.h" @@ -45,8 +47,6 @@ #include "ractor_core.h" #include "vm_sync.h" #include "shape.h" -#include "insns.inc" -#include "zjit.h" #include "builtin.h" diff --git a/vm_exec.c b/vm_exec.c index 947d4dc42169a9..3d7c241a321bfa 100644 --- a/vm_exec.c +++ b/vm_exec.c @@ -11,8 +11,8 @@ #include -#if USE_YJIT -// The number of instructions executed on vm_exec_core. --yjit-stats uses this. +#if USE_YJIT || USE_ZJIT +// The number of instructions executed on vm_exec_core. --yjit-stats and --zjit-stats use this. uint64_t rb_vm_insns_count = 0; #endif diff --git a/vm_insnhelper.h b/vm_insnhelper.h index ba957406978e11..24bfbb8210b7d0 100644 --- a/vm_insnhelper.h +++ b/vm_insnhelper.h @@ -16,7 +16,7 @@ RUBY_EXTERN rb_serial_t ruby_vm_constant_cache_invalidations; RUBY_EXTERN rb_serial_t ruby_vm_constant_cache_misses; RUBY_EXTERN rb_serial_t ruby_vm_global_cvar_state; -#if USE_YJIT && YJIT_STATS // We want vm_insns_count only on stats builds. +#if YJIT_STATS || ZJIT_STATS // We want vm_insns_count only on stats builds. // Increment vm_insns_count for --yjit-stats. We increment this even when // --yjit or --yjit-stats is not used because branching to skip it is slower. // We also don't use ATOMIC_INC for performance, allowing inaccuracy on Ractors. diff --git a/yjit.c b/yjit.c index b3364ff6065515..46f89e2020c3f1 100644 --- a/yjit.c +++ b/yjit.c @@ -23,13 +23,13 @@ #include "insns_info.inc" #include "vm_sync.h" #include "yjit.h" +#include "zjit.h" #include "vm_insnhelper.h" #include "probes.h" #include "probes_helper.h" #include "iseq.h" #include "ruby/debug.h" #include "internal/cont.h" -#include "zjit.h" // For mmapp(), sysconf() #ifndef _WIN32 diff --git a/yjit.h b/yjit.h index cb96ee78382c63..645f0af49d8fd3 100644 --- a/yjit.h +++ b/yjit.h @@ -9,10 +9,9 @@ #include "vm_core.h" #include "method.h" -// YJIT_STATS controls whether to support runtime counters in generated code -// and in the interpreter. +// YJIT_STATS controls whether to support runtime counters in the interpreter #ifndef YJIT_STATS -# define YJIT_STATS RUBY_DEBUG +# define YJIT_STATS (USE_YJIT && RUBY_DEBUG) #endif #if USE_YJIT diff --git a/zjit.c b/zjit.c index 61c17d32c30751..abe74225404c98 100644 --- a/zjit.c +++ b/zjit.c @@ -15,6 +15,7 @@ #include "builtin.h" #include "insns.inc" #include "insns_info.inc" +#include "zjit.h" #include "vm_sync.h" #include "vm_insnhelper.h" #include "probes.h" @@ -22,7 +23,6 @@ #include "iseq.h" #include "ruby/debug.h" #include "internal/cont.h" -#include "zjit.h" // For mmapp(), sysconf() #ifndef _WIN32 @@ -331,9 +331,6 @@ rb_iseq_set_zjit_payload(const rb_iseq_t *iseq, void *payload) iseq->body->zjit_payload = payload; } -// Primitives used by zjit.rb -VALUE rb_zjit_assert_compiles(rb_execution_context_t *ec, VALUE self); - void rb_zjit_print_exception(void) { @@ -349,5 +346,10 @@ rb_zjit_shape_obj_too_complex_p(VALUE obj) return rb_shape_obj_too_complex_p(obj); } +// Primitives used by zjit.rb. Don't put other functions below, which wouldn't use them. +VALUE rb_zjit_assert_compiles(rb_execution_context_t *ec, VALUE self); +VALUE rb_zjit_stats(rb_execution_context_t *ec, VALUE self); +VALUE rb_zjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self); + // Preprocessed zjit.rb generated during build #include "zjit.rbinc" diff --git a/zjit.h b/zjit.h index e354d4b122d0cf..5ce2826d067a2b 100644 --- a/zjit.h +++ b/zjit.h @@ -4,6 +4,11 @@ // This file contains definitions ZJIT exposes to the CRuby codebase // +// ZJIT_STATS controls whether to support runtime counters in the interpreter +#ifndef ZJIT_STATS +# define ZJIT_STATS (USE_ZJIT && RUBY_DEBUG) +#endif + #if USE_ZJIT extern bool rb_zjit_enabled_p; extern uint64_t rb_zjit_call_threshold; diff --git a/zjit.rb b/zjit.rb index fd58c1c94a097a..2bc779ef2895ec 100644 --- a/zjit.rb +++ b/zjit.rb @@ -1,6 +1,61 @@ +# frozen_string_literal: true + +# This module allows for introspection of \ZJIT, CRuby's just-in-time compiler. +# Everything in the module is highly implementation specific and the API might +# be less stable compared to the standard library. +# +# This module may not exist if \ZJIT does not support the particular platform +# for which CRuby is built. module RubyVM::ZJIT + # Avoid calling a Ruby method here to avoid interfering with compilation tests + if Primitive.rb_zjit_stats_enabled_p + at_exit { print_stats } + end +end + +class << RubyVM::ZJIT + # Return ZJIT statistics as a Hash + def stats + stats = Primitive.rb_zjit_stats + + if stats.key?(:vm_insns_count) && stats.key?(:zjit_insns_count) + stats[:total_insns_count] = stats[:vm_insns_count] + stats[:zjit_insns_count] + stats[:ratio_in_zjit] = 100.0 * stats[:zjit_insns_count] / stats[:total_insns_count] + end + + stats + end + + # Get the summary of ZJIT statistics as a String + def stats_string + buf = +'' + stats = self.stats + + [ + :total_insns_count, + :vm_insns_count, + :zjit_insns_count, + :ratio_in_zjit, + ].each do |key| + value = stats[key] + if key == :ratio_in_zjit + value = '%0.1f%%' % value + end + buf << "#{'%-18s' % "#{key}:"} #{value}\n" + end + buf + end + # Assert that any future ZJIT compilation will return a function pointer - def self.assert_compiles + def assert_compiles # :nodoc: Primitive.rb_zjit_assert_compiles end + + # :stopdoc: + private + + # Print ZJIT stats + def print_stats + $stderr.write stats_string + end end diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index cb15c3657e37e1..f67d8e91d31a62 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -392,6 +392,9 @@ fn main() { .allowlist_function("rb_ivar_set") .allowlist_function("rb_mod_name") + // From internal/vm.h + .allowlist_var("rb_vm_insns_count") + // From include/ruby/internal/intern/vm.h .allowlist_function("rb_get_alloc_func") diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index de73586249f624..13671fb04c3c3c 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -7,6 +7,7 @@ use crate::backend::current::{Reg, ALLOC_REGS}; use crate::invariants::{track_bop_assumption, track_cme_assumption, track_stable_constant_names_assumption}; use crate::gc::{get_or_create_iseq_payload, append_gc_offsets}; use crate::state::ZJITState; +use crate::stats::{counter_ptr, Counter}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, SideExitContext, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP}; use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, CallInfo, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX}; @@ -354,6 +355,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::PutSpecialObject { value_type } => gen_putspecialobject(asm, *value_type), Insn::AnyToString { val, str, state } => gen_anytostring(asm, opnd!(val), opnd!(str), &function.frame_state(*state))?, Insn::Defined { op_type, obj, pushval, v } => gen_defined(jit, asm, *op_type, *obj, *pushval, opnd!(v))?, + &Insn::IncrCounter(counter) => return Some(gen_incr_counter(asm, counter)), _ => { debug!("ZJIT: gen_function: unexpected insn {insn}"); return None; @@ -1072,6 +1074,16 @@ fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, Some(val) } +/// Generate code that increments a counter in ZJIT stats +fn gen_incr_counter(asm: &mut Assembler, counter: Counter) -> () { + let ptr = counter_ptr(counter); + let ptr_reg = asm.load(Opnd::const_ptr(ptr as *const u8)); + let counter_opnd = Opnd::mem(64, ptr_reg, 0); + + // Increment and store the updated value + asm.incr_counter(counter_opnd, Opnd::UImm(1)); +} + /// Save the incremented PC on the CFP. /// This is necessary when callees can raise or allocate. fn gen_save_pc(asm: &mut Assembler, state: &FrameState) { diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 268bae3dc4860b..10f12798f66dad 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -832,6 +832,7 @@ unsafe extern "C" { elts: *const VALUE, ) -> VALUE; pub fn rb_vm_top_self() -> VALUE; + pub static mut rb_vm_insns_count: u64; pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t; pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t; pub fn rb_callable_method_entry_or_negative( diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 77eac854f8b9eb..90dce2e4e45fee 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -4,7 +4,7 @@ #![allow(non_upper_case_globals)] use crate::{ - cast::IntoUsize, cruby::*, options::{get_option, DumpHIR}, gc::{get_or_create_iseq_payload, IseqPayload}, state::ZJITState + cast::IntoUsize, cruby::*, gc::{get_or_create_iseq_payload, IseqPayload}, options::{get_option, DumpHIR}, state::ZJITState, stats::Counter }; use std::{ cell::RefCell, collections::{HashMap, HashSet, VecDeque}, ffi::{c_int, c_void, CStr}, fmt::Display, mem::{align_of, size_of}, ptr, slice::Iter @@ -566,6 +566,9 @@ pub enum Insn { /// Side-exit into the interpreter. SideExit { state: InsnId, reason: SideExitReason }, + + /// Increment a counter in ZJIT stats + IncrCounter(Counter), } impl Insn { @@ -576,7 +579,7 @@ impl Insn { | Insn::IfTrue { .. } | Insn::IfFalse { .. } | Insn::Return { .. } | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. } - | Insn::SetLocal { .. } | Insn::Throw { .. } => false, + | Insn::SetLocal { .. } | Insn::Throw { .. } | Insn::IncrCounter(_) => false, _ => true, } } @@ -786,6 +789,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { } write!(f, "Throw {state_string}, {val}") } + Insn::IncrCounter(counter) => write!(f, "IncrCounter {counter:?}"), insn => { write!(f, "{insn:?}") } } } @@ -1096,7 +1100,8 @@ impl Function { | PutSpecialObject {..} | GetGlobal {..} | GetLocal {..} - | SideExit {..}) => result.clone(), + | SideExit {..} + | IncrCounter(_)) => result.clone(), Snapshot { state: FrameState { iseq, insn_idx, pc, stack, locals } } => Snapshot { state: FrameState { @@ -1217,7 +1222,7 @@ impl Function { Insn::SetGlobal { .. } | Insn::ArraySet { .. } | Insn::Jump(_) | Insn::IfTrue { .. } | Insn::IfFalse { .. } | Insn::Return { .. } | Insn::Throw { .. } | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::ArrayExtend { .. } - | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } => + | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_) => panic!("Cannot infer type of instruction with no output: {}", self.insns[insn.0]), Insn::Const { val: Const::Value(val) } => Type::from_value(*val), Insn::Const { val: Const::CBool(val) } => Type::from_cbool(*val), @@ -1835,7 +1840,8 @@ impl Function { &Insn::Const { .. } | &Insn::Param { .. } | &Insn::GetLocal { .. } - | &Insn::PutSpecialObject { .. } => + | &Insn::PutSpecialObject { .. } + | &Insn::IncrCounter(_) => {} &Insn::PatchPoint { state, .. } | &Insn::GetConstantPath { ic: _, state } => { @@ -2622,6 +2628,11 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let exit_state = state.clone(); profiles.profile_stack(&exit_state); + // Increment zjit_insns_count for each YARV instruction if --zjit-stats is enabled. + if get_option!(stats) { + fun.push_insn(block, Insn::IncrCounter(Counter::zjit_insns_count)); + } + // try_into() call below is unfortunate. Maybe pick i32 instead of usize for opcodes. let opcode: u32 = unsafe { rb_iseq_opcode_at_pc(iseq, pc) } .try_into() diff --git a/zjit/src/options.rs b/zjit/src/options.rs index e97c5ff99fd3be..fc8ee23e263798 100644 --- a/zjit/src/options.rs +++ b/zjit/src/options.rs @@ -1,5 +1,6 @@ use std::{ffi::{CStr, CString}, ptr::null}; use std::os::raw::{c_char, c_int, c_uint}; +use crate::cruby::*; /// Number of calls to start profiling YARV instructions. /// They are profiled `rb_zjit_call_threshold - rb_zjit_profile_threshold` times, @@ -14,11 +15,18 @@ pub static mut rb_zjit_profile_threshold: u64 = 1; #[allow(non_upper_case_globals)] pub static mut rb_zjit_call_threshold: u64 = 2; +/// True if --zjit-stats is enabled. +#[allow(non_upper_case_globals)] +static mut zjit_stats_enabled_p: bool = false; + #[derive(Clone, Copy, Debug)] pub struct Options { /// Number of times YARV instructions should be profiled. pub num_profiles: u8, + /// Enable YJIT statsitics + pub stats: bool, + /// Enable debug logging pub debug: bool, @@ -42,6 +50,7 @@ pub struct Options { pub fn init_options() -> Options { Options { num_profiles: 1, + stats: false, debug: false, dump_hir_init: None, dump_hir_opt: None, @@ -56,6 +65,8 @@ pub fn init_options() -> Options { pub const ZJIT_OPTIONS: &'static [(&str, &str)] = &[ ("--zjit-call-threshold=num", "Number of calls to trigger JIT (default: 2)."), ("--zjit-num-profiles=num", "Number of profiled calls before JIT (default: 1, max: 255)."), + ("--zjit-stats", "Enable collecting ZJIT statistics."), + ("--zjit-perf", "Dump ISEQ symbols into /tmp/perf-{}.map for Linux perf."), ]; #[derive(Clone, Copy, Debug)] @@ -132,6 +143,11 @@ fn parse_option(options: &mut Options, str_ptr: *const std::os::raw::c_char) -> Err(_) => return None, }, + ("stats", "") => { + unsafe { zjit_stats_enabled_p = true; } + options.stats = true; + } + ("debug", "") => options.debug = true, // --zjit-dump-hir dumps the actual input to the codegen, which is currently the same as --zjit-dump-hir-opt. @@ -193,3 +209,15 @@ macro_rules! debug { }; } pub(crate) use debug; + +/// Return Qtrue if --zjit-stats has been enabled +#[unsafe(no_mangle)] +pub extern "C" fn rb_zjit_stats_enabled_p(_ec: EcPtr, _self: VALUE) -> VALUE { + // ZJITState is not initialized yet when loading builtins, so this relies + // on a separate global variable. + if unsafe { zjit_stats_enabled_p } { + Qtrue + } else { + Qfalse + } +} diff --git a/zjit/src/state.rs b/zjit/src/state.rs index 001b550747e904..1878658d8fbbe6 100644 --- a/zjit/src/state.rs +++ b/zjit/src/state.rs @@ -1,8 +1,9 @@ -use crate::cruby::{self, rb_bug_panic_hook, EcPtr, Qnil, VALUE}; +use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insns_count, EcPtr, Qnil, VALUE}; use crate::cruby_methods; use crate::invariants::Invariants; use crate::options::Options; use crate::asm::CodeBlock; +use crate::stats::Counters; #[allow(non_upper_case_globals)] #[unsafe(no_mangle)] @@ -21,6 +22,9 @@ pub struct ZJITState { /// ZJIT command-line options options: Options, + /// ZJIT statistics + counters: Counters, + /// Assumptions that require invalidation invariants: Invariants, @@ -80,6 +84,7 @@ impl ZJITState { let zjit_state = ZJITState { code_block: cb, options, + counters: Counters::default(), invariants: Invariants::default(), assert_compiles: false, method_annotations: cruby_methods::init(), @@ -126,6 +131,11 @@ impl ZJITState { let instance = ZJITState::get_instance(); instance.assert_compiles = true; } + + /// Get a mutable reference to counters for ZJIT stats + pub fn get_counters() -> &'static mut Counters { + &mut ZJITState::get_instance().counters + } } /// Initialize ZJIT, given options allocated by rb_zjit_init_options() @@ -142,6 +152,9 @@ pub extern "C" fn rb_zjit_init(options: *const u8) { rb_bug_panic_hook(); + // Discard the instruction count for boot which we never compile + unsafe { rb_vm_insns_count = 0; } + // ZJIT enabled and initialized successfully assert!(unsafe{ !rb_zjit_enabled_p }); unsafe { rb_zjit_enabled_p = true; } diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 06913522c4d45b..4fbad5d24701df 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -4,6 +4,59 @@ // // Comptime vs Runtime stats? +use crate::{cruby::*, options::get_option, state::{zjit_enabled_p, ZJITState}}; + +macro_rules! make_counters { + ($($counter_name:ident,)+) => { + /// Struct containing the counter values + #[derive(Default, Debug)] + pub struct Counters { $(pub $counter_name: u64),+ } + + /// Enum to represent a counter + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, PartialEq, Eq, Debug)] + pub enum Counter { $($counter_name),+ } + + /// Map a counter to a pointer + pub fn counter_ptr(counter: Counter) -> *mut u64 { + let counters = $crate::state::ZJITState::get_counters(); + match counter { + $( Counter::$counter_name => std::ptr::addr_of_mut!(counters.$counter_name) ),+ + } + } + } +} + +// Declare all the counters we track +make_counters! { + // The number of times YARV instructions are executed on JIT code + zjit_insns_count, +} + pub fn zjit_alloc_size() -> usize { 0 // TODO: report the actual memory usage } + +/// Return a Hash object that contains ZJIT statistics +#[unsafe(no_mangle)] +pub extern "C" fn rb_zjit_stats(_ec: EcPtr, _self: VALUE) -> VALUE { + if !zjit_enabled_p() { + return Qnil; + } + + fn set_stat(hash: VALUE, key: &str, value: u64) { + unsafe { rb_hash_aset(hash, rust_str_to_sym(key), VALUE::fixnum_from_usize(value as usize)); } + } + + let hash = unsafe { rb_hash_new() }; + // TODO: Set counters that are always available here + + // Set counters that are enabled when --zjit-stats is enabled + if get_option!(stats) { + let counters = ZJITState::get_counters(); + set_stat(hash, "zjit_insns_count", counters.zjit_insns_count); + set_stat(hash, "vm_insns_count", unsafe { rb_vm_insns_count }); + } + + hash +} From 026079925c2ce7ff660d5e1ba8e2e0d7b0cc6b02 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Tue, 22 Jul 2025 13:16:40 +0200 Subject: [PATCH 6/7] [ruby/prism] Do not use `0` to indicate the latest ruby version to parse This makes it hard to do version checks against this value. The current version checks work because there are so few possible values at the moment. As an example, PR 3337 introduces new syntax for ruby 3.5 and uses `PM_OPTIONS_VERSION_LATEST` as its version guard. Because what is considered the latest changes every year, it must later be changed to `parser->version == parser->version == PM_OPTIONS_VERSION_CRUBY_3_5 || parser->version == PM_OPTIONS_VERSION_LATEST`, with one extra version each year. With this change, the PR can instead write `parser->version >= PM_OPTIONS_VERSION_CRUBY_3_5` which is self-explanatory and works for future versions. https://github.com/ruby/prism/commit/8318a113ca --- lib/prism/ffi.rb | 4 ++-- prism/options.c | 4 ++-- prism/options.h | 12 +++++++++--- prism/prism.c | 26 ++++++++++++++++---------- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 5a4ba09a4fd9a0..1e1bf8b1c877d4 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -422,13 +422,13 @@ def dump_options_command_line(options) def dump_options_version(version) case version when nil, "latest" - 0 + 0 # Handled in pm_parser_init when /\A3\.3(\.\d+)?\z/ 1 when /\A3\.4(\.\d+)?\z/ 2 when /\A3\.5(\.\d+)?\z/ - 0 + 3 else raise ArgumentError, "invalid version: #{version}" end diff --git a/prism/options.c b/prism/options.c index a457178ce85757..1b5c022cf54f9f 100644 --- a/prism/options.c +++ b/prism/options.c @@ -89,7 +89,7 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length } if (strncmp(version, "3.5", 3) == 0) { - options->version = PM_OPTIONS_VERSION_LATEST; + options->version = PM_OPTIONS_VERSION_CRUBY_3_5; return true; } @@ -108,7 +108,7 @@ pm_options_version_set(pm_options_t *options, const char *version, size_t length } if (strncmp(version, "3.5.", 4) == 0 && is_number(version + 4, length - 4)) { - options->version = PM_OPTIONS_VERSION_LATEST; + options->version = PM_OPTIONS_VERSION_CRUBY_3_5; return true; } } diff --git a/prism/options.h b/prism/options.h index 2f64701b0c20ea..092fda4f07878a 100644 --- a/prism/options.h +++ b/prism/options.h @@ -82,14 +82,20 @@ typedef void (*pm_options_shebang_callback_t)(struct pm_options *options, const * parse in the same way as a specific version of CRuby would have. */ typedef enum { - /** The current version of prism. */ - PM_OPTIONS_VERSION_LATEST = 0, + /** If an explicit version is not provided, the current version of prism will be used. */ + PM_OPTIONS_VERSION_UNSET = 0, /** The vendored version of prism in CRuby 3.3.x. */ PM_OPTIONS_VERSION_CRUBY_3_3 = 1, /** The vendored version of prism in CRuby 3.4.x. */ - PM_OPTIONS_VERSION_CRUBY_3_4 = 2 + PM_OPTIONS_VERSION_CRUBY_3_4 = 2, + + /** The vendored version of prism in CRuby 3.5.x. */ + PM_OPTIONS_VERSION_CRUBY_3_5 = 3, + + /** The current version of prism. */ + PM_OPTIONS_VERSION_LATEST = PM_OPTIONS_VERSION_CRUBY_3_5 } pm_options_version_t; /** diff --git a/prism/prism.c b/prism/prism.c index a40e0ebeb0c821..5728c2584bfe61 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -1409,7 +1409,7 @@ pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) { static inline void pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) { if (pm_conditional_predicate_warn_write_literal_p(node)) { - pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL); + pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL); } } @@ -2976,7 +2976,7 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const */ static void pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) { - if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) { + if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) { if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) { pm_node_t *node; PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) { @@ -9113,7 +9113,7 @@ lex_global_variable(pm_parser_t *parser) { } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0); // $0 isn't allowed to be followed by anything. - pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; + pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id); } @@ -9150,7 +9150,7 @@ lex_global_variable(pm_parser_t *parser) { } else { // If we get here, then we have a $ followed by something that // isn't recognized as a global variable. - pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; + pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL; const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end); PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start); } @@ -10177,7 +10177,7 @@ lex_at_variable(pm_parser_t *parser) { } } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) { pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE; - if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) { + if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) { diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3; } @@ -14667,7 +14667,7 @@ parse_parameters( parser_lex(parser); pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name); - uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0; + uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0; if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true); pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1)); @@ -14683,7 +14683,7 @@ parse_parameters( // If the value of the parameter increased the number of // reads of that parameter, then we need to warn that we // have a circular definition. - if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { + if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR); } @@ -14768,13 +14768,13 @@ parse_parameters( if (token_begins_expression_p(parser->current.type)) { pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local); - uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0; + uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0; if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true); pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1)); if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser); - if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { + if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) { PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR); } @@ -16478,7 +16478,7 @@ parse_variable(pm_parser_t *parser) { pm_node_list_append(¤t_scope->implicit_parameters, node); return node; - } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) { + } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) { pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous); pm_node_list_append(¤t_scope->implicit_parameters, node); @@ -22641,6 +22641,12 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm } } + // Now that we have established the user-provided options, check if + // a version was given and parse as the latest version otherwise. + if (parser->version == PM_OPTIONS_VERSION_UNSET) { + parser->version = PM_OPTIONS_VERSION_LATEST; + } + pm_accepts_block_stack_push(parser, true); // Skip past the UTF-8 BOM if it exists. From 2eab962c538424863e34dc20fa13e25b058dc6b5 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Thu, 17 Jul 2025 12:29:40 +0100 Subject: [PATCH 7/7] [ruby/prism] Allow command calls in endless method bodies within assignments Previously, endless method definitions in assignment contexts like `x = def f = p 1` would fail to parse because command calls (method calls without parentheses) were only accepted when the surrounding binding power was less than `PM_BINDING_POWER_COMPOSITION`. This fix specifically checks for assignment context and allows command calls in those cases while maintaining the existing behavior for other contexts. This ensures that: - `x = def f = p 1` parses correctly (previously failed) - `private def f = puts "Hello"` still produces the expected error https://github.com/ruby/prism/commit/722af59ba3 --- prism/prism.c | 10 +++++++++- test/prism/errors/private_endless_method.txt | 3 +++ test/prism/fixtures/endless_methods.txt | 2 ++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 test/prism/errors/private_endless_method.txt diff --git a/prism/prism.c b/prism/prism.c index 5728c2584bfe61..ec8f84fb6bc8da 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -19524,7 +19524,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_do_loop_stack_push(parser, false); statements = (pm_node_t *) pm_statements_node_create(parser); - pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1)); + // In endless method bodies, we need to handle command calls carefully. + // We want to allow command calls in assignment context but maintain + // the same binding power to avoid changing how operators are parsed. + // Note that we're intentionally NOT allowing code like `private def foo = puts "Hello"` + // because the original parser, parse.y, can't handle it and we want to maintain the same behavior + bool allow_command_call = (binding_power == PM_BINDING_POWER_ASSIGNMENT) || + (binding_power < PM_BINDING_POWER_COMPOSITION); + + pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1)); if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) { context_push(parser, PM_CONTEXT_RESCUE_MODIFIER); diff --git a/test/prism/errors/private_endless_method.txt b/test/prism/errors/private_endless_method.txt new file mode 100644 index 00000000000000..8aae5e0cd39035 --- /dev/null +++ b/test/prism/errors/private_endless_method.txt @@ -0,0 +1,3 @@ +private def foo = puts "Hello" + ^ unexpected string literal, expecting end-of-input + diff --git a/test/prism/fixtures/endless_methods.txt b/test/prism/fixtures/endless_methods.txt index 8c2f2a30cc58c0..7eb3bf431897b1 100644 --- a/test/prism/fixtures/endless_methods.txt +++ b/test/prism/fixtures/endless_methods.txt @@ -3,3 +3,5 @@ def foo = 1 def bar = A "" def method = 1 + 2 + 3 + +x = def f = p 1