From 6f1bb06cdc579a3f33afadb18cfad96a4f9ac303 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 18 Aug 2025 11:09:09 -0400 Subject: [PATCH 1/8] Change TestHash#test_inspect to use EnvUtil.with_default_external --- test/ruby/test_hash.rb | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index dbf041a7321252..576a5f60649666 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -880,21 +880,20 @@ def test_inspect assert_equal(quote1, eval(quote1).inspect) assert_equal(quote2, eval(quote2).inspect) assert_equal(quote3, eval(quote3).inspect) - begin - verbose_bak, $VERBOSE = $VERBOSE, nil - enc = Encoding.default_external - Encoding.default_external = Encoding::ASCII + + EnvUtil.with_default_external(Encoding::ASCII) do utf8_ascii_hash = '{"\\u3042": 1}' assert_equal(eval(utf8_ascii_hash).inspect, utf8_ascii_hash) - Encoding.default_external = Encoding::UTF_8 + end + + EnvUtil.with_default_external(Encoding::UTF_8) do utf8_hash = "{\u3042: 1}" assert_equal(eval(utf8_hash).inspect, utf8_hash) - Encoding.default_external = Encoding::Windows_31J + end + + EnvUtil.with_default_external(Encoding::Windows_31J) do sjis_hash = "{\x87]: 1}".force_encoding('sjis') assert_equal(eval(sjis_hash).inspect, sjis_hash) - ensure - Encoding.default_external = enc - $VERBOSE = verbose_bak end end From 87409853631a752170b9b27278d7cb812cd183af Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 18 Aug 2025 11:15:08 -0400 Subject: [PATCH 2/8] Change TestM17N#test_string_inspect_encoding to use EnvUtil.with_default_external --- test/ruby/test_m17n.rb | 56 ++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index b0e2e9f849eeeb..2db70080d13b83 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -186,33 +186,35 @@ def test_string_inspect_invalid end def test_string_inspect_encoding - EnvUtil.suppress_warning do - begin - orig_int = Encoding.default_internal - orig_ext = Encoding.default_external - Encoding.default_internal = nil - [Encoding::UTF_8, Encoding::EUC_JP, Encoding::Windows_31J, Encoding::GB18030]. - each do |e| - Encoding.default_external = e - str = "\x81\x30\x81\x30".force_encoding('GB18030') - assert_equal(Encoding::GB18030 == e ? %{"#{str}"} : '"\x{81308130}"', str.inspect) - str = e("\xa1\x8f\xa1\xa1") - expected = "\"\\xA1\x8F\xA1\xA1\"".force_encoding("EUC-JP") - assert_equal(Encoding::EUC_JP == e ? expected : "\"\\xA1\\x{8FA1A1}\"", str.inspect) - str = s("\x81@") - assert_equal(Encoding::Windows_31J == e ? %{"#{str}"} : '"\x{8140}"', str.inspect) - str = "\u3042\u{10FFFD}" - assert_equal(Encoding::UTF_8 == e ? %{"#{str}"} : '"\u3042\u{10FFFD}"', str.inspect) - end - Encoding.default_external = Encoding::UTF_8 - [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE, - Encoding::UTF8_SOFTBANK].each do |e| - str = "abc".encode(e) - assert_equal('"abc"', str.inspect) - end - ensure - Encoding.default_internal = orig_int - Encoding.default_external = orig_ext + [ + Encoding::UTF_8, + Encoding::EUC_JP, + Encoding::Windows_31J, + Encoding::GB18030, + ].each do |e| + EnvUtil.with_default_external(e) do + str = "\x81\x30\x81\x30".force_encoding('GB18030') + assert_equal(Encoding::GB18030 == e ? %{"#{str}"} : '"\x{81308130}"', str.inspect) + str = e("\xa1\x8f\xa1\xa1") + expected = "\"\\xA1\x8F\xA1\xA1\"".force_encoding("EUC-JP") + assert_equal(Encoding::EUC_JP == e ? expected : "\"\\xA1\\x{8FA1A1}\"", str.inspect) + str = s("\x81@") + assert_equal(Encoding::Windows_31J == e ? %{"#{str}"} : '"\x{8140}"', str.inspect) + str = "\u3042\u{10FFFD}" + assert_equal(Encoding::UTF_8 == e ? %{"#{str}"} : '"\u3042\u{10FFFD}"', str.inspect) + end + end + + EnvUtil.with_default_external(Encoding::UTF_8) do + [ + Encoding::UTF_16BE, + Encoding::UTF_16LE, + Encoding::UTF_32BE, + Encoding::UTF_32LE, + Encoding::UTF8_SOFTBANK + ].each do |e| + str = "abc".encode(e) + assert_equal('"abc"', str.inspect) end end end From f3d95fa03348ef622396d12212b96872b693c265 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 18 Aug 2025 11:16:13 -0400 Subject: [PATCH 3/8] Change TestM17N#test_object_utf16_32_inspect to use EnvUtil.with_default_external --- test/ruby/test_m17n.rb | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 2db70080d13b83..259d179fb85342 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -248,20 +248,11 @@ def test_utf_without_bom_valid end def test_object_utf16_32_inspect - EnvUtil.suppress_warning do - begin - orig_int = Encoding.default_internal - orig_ext = Encoding.default_external - Encoding.default_internal = nil - Encoding.default_external = Encoding::UTF_8 - o = Object.new - [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].each do |e| - o.instance_eval "undef inspect;def inspect;'abc'.encode('#{e}');end" - assert_equal '[abc]', [o].inspect - end - ensure - Encoding.default_internal = orig_int - Encoding.default_external = orig_ext + EnvUtil.with_default_external(Encoding::UTF_8) do + o = Object.new + [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].each do |e| + o.instance_eval "undef inspect;def inspect;'abc'.encode('#{e}');end" + assert_equal '[abc]', [o].inspect end end end From 89321c63262cd75a8191b67157b2f1718c722866 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 18 Aug 2025 11:21:11 -0400 Subject: [PATCH 4/8] Change TestM17N#test_object_inspect_external to use EnvUtil.with_default_external --- test/ruby/test_m17n.rb | 43 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 259d179fb85342..9f7a3c7f4b7ec4 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -258,40 +258,33 @@ def test_object_utf16_32_inspect end def test_object_inspect_external - orig_v, $VERBOSE = $VERBOSE, false - orig_int, Encoding.default_internal = Encoding.default_internal, nil - orig_ext = Encoding.default_external - omit "https://bugs.ruby-lang.org/issues/18338" o = Object.new - Encoding.default_external = Encoding::UTF_16BE - def o.inspect - "abc" - end - assert_nothing_raised(Encoding::CompatibilityError) { [o].inspect } + EnvUtil.with_default_external(Encoding::UTF_16BE) do + def o.inspect + "abc" + end + assert_nothing_raised(Encoding::CompatibilityError) { [o].inspect } - def o.inspect - "abc".encode(Encoding.default_external) + def o.inspect + "abc".encode(Encoding.default_external) + end + assert_equal '[abc]', [o].inspect end - assert_equal '[abc]', [o].inspect - - Encoding.default_external = Encoding::US_ASCII - def o.inspect - "\u3042" - end - assert_equal '[\u3042]', [o].inspect + EnvUtil.with_default_external(Encoding::US_ASCII) do + def o.inspect + "\u3042" + end + assert_equal '[\u3042]', [o].inspect - def o.inspect - "\x82\xa0".force_encoding(Encoding::Windows_31J) + def o.inspect + "\x82\xa0".force_encoding(Encoding::Windows_31J) + end + assert_equal '[\x{82A0}]', [o].inspect end - assert_equal '[\x{82A0}]', [o].inspect - ensure - Encoding.default_internal = orig_int - Encoding.default_external = orig_ext - $VERBOSE = orig_v end def test_str_dump From 6281806fc6dee87ec39a545ce0a157d740446674 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 18 Aug 2025 11:22:32 -0400 Subject: [PATCH 5/8] Change TestString#test_ascii_incomat_inspect to use EnvUtil.with_default_external --- test/ruby/test_string.rb | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index c7e4b0c1ec7fee..1e0f31ba7c540a 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -3251,18 +3251,12 @@ def test_ascii_incomat_inspect assert_equal('"\\u3042\\u3044\\u3046"', S("\u3042\u3044\u3046".encode(e)).inspect) assert_equal('"ab\\"c"', S("ab\"c".encode(e)).inspect, bug4081) end - begin - verbose, $VERBOSE = $VERBOSE, nil - ext = Encoding.default_external - Encoding.default_external = "us-ascii" - $VERBOSE = verbose + + EnvUtil.with_default_external(Encoding::US_ASCII) do i = S("abc\"\\".force_encoding("utf-8")).inspect - ensure - $VERBOSE = nil - Encoding.default_external = ext - $VERBOSE = verbose + + assert_equal('"abc\\"\\\\"', i, bug4081) end - assert_equal('"abc\\"\\\\"', i, bug4081) end def test_dummy_inspect From fc5ee247d5307a292cd2b083ce82fc24005bb385 Mon Sep 17 00:00:00 2001 From: Daniel Colson Date: Tue, 19 Aug 2025 10:02:13 -0400 Subject: [PATCH 6/8] ZJIT: Compile toregexp (#14200) `toregexp` is fairly similar to `concatstrings`, so this commit extracts a helper for pushing and popping operands on the native stack. There's probably opportunity to move some of this into lir (e.g. Alan suggested a push_many that could use STP on ARM to push 2 at a time), but I might save that for another day. --- internal/re.h | 5 ++ re.c | 5 -- test/ruby/test_zjit.rb | 8 +++ zjit/bindgen/src/main.rs | 7 +++ zjit/src/codegen.rs | 41 ++++++++++++---- zjit/src/cruby_bindings.inc.rs | 5 ++ zjit/src/hir.rs | 89 ++++++++++++++++++++++++++++++++++ 7 files changed, 146 insertions(+), 14 deletions(-) diff --git a/internal/re.h b/internal/re.h index 2788f8b42a75b3..593e5c464fdfb3 100644 --- a/internal/re.h +++ b/internal/re.h @@ -25,4 +25,9 @@ int rb_match_count(VALUE match); VALUE rb_reg_new_ary(VALUE ary, int options); VALUE rb_reg_last_defined(VALUE match); +#define ARG_REG_OPTION_MASK \ + (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND) +#define ARG_ENCODING_FIXED 16 +#define ARG_ENCODING_NONE 32 + #endif /* INTERNAL_RE_H */ diff --git a/re.c b/re.c index 9348622eea2936..13d7f0ef9e5fc7 100644 --- a/re.c +++ b/re.c @@ -290,11 +290,6 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) #define KCODE_FIXED FL_USER4 -#define ARG_REG_OPTION_MASK \ - (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND) -#define ARG_ENCODING_FIXED 16 -#define ARG_ENCODING_NONE 32 - static int char_to_option(int c) { diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 96ac99b6db6195..e18333a58f07f7 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1847,6 +1847,14 @@ def test = "#{}" }, insns: [:concatstrings] end + def test_regexp_interpolation + assert_compiles '/123/', %q{ + def test = /#{1}#{2}#{3}/ + + test + }, insns: [:toregexp] + end + private # Assert that every method call in `test_script` can be compiled by ZJIT diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 77299c26574675..59b7f9737ea233 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -259,6 +259,13 @@ fn main() { // From internal/re.h .allowlist_function("rb_reg_new_ary") + .allowlist_var("ARG_ENCODING_FIXED") + .allowlist_var("ARG_ENCODING_NONE") + + // From include/ruby/onigmo.h + .allowlist_var("ONIG_OPTION_IGNORECASE") + .allowlist_var("ONIG_OPTION_EXTEND") + .allowlist_var("ONIG_OPTION_MULTILINE") // `ruby_value_type` is a C enum and this stops it from // prefixing all the members with the name of the type diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 5780a2635778f9..37f10c92eb089d 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -346,6 +346,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::StringConcat { strings, .. } if strings.is_empty() => return None, Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)), Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)), + Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)), Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"), Insn::Snapshot { .. } => return Some(()), // we don't need to do anything for this instruction at the moment Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)), @@ -1595,36 +1596,58 @@ pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Option { }) } -fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec, state: &FrameState) -> Opnd { - gen_prepare_non_leaf_call(jit, asm, state); +fn gen_push_opnds(jit: &mut JITState, asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd { + let n = opnds.len(); // Calculate the compile-time NATIVE_STACK_PTR offset from NATIVE_BASE_PTR // At this point, frame_setup(&[], jit.c_stack_slots) has been called, // which allocated aligned_stack_bytes(jit.c_stack_slots) on the stack let frame_size = aligned_stack_bytes(jit.c_stack_slots); - let n = strings.len(); let allocation_size = aligned_stack_bytes(n); - asm_comment!(asm, "allocate {} bytes on C stack for {} strings", allocation_size, n); + asm_comment!(asm, "allocate {} bytes on C stack for {} values", allocation_size, n); asm.sub_into(NATIVE_STACK_PTR, allocation_size.into()); // Calculate the total offset from NATIVE_BASE_PTR to our buffer let total_offset_from_base = (frame_size + allocation_size) as i32; - for (idx, &string_opnd) in strings.iter().enumerate() { + for (idx, &opnd) in opnds.iter().enumerate() { let slot_offset = -total_offset_from_base + (idx as i32 * SIZEOF_VALUE_I32); asm.mov( Opnd::mem(VALUE_BITS, NATIVE_BASE_PTR, slot_offset), - string_opnd + opnd ); } - let first_string_ptr = asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base)); - - let result = asm_ccall!(asm, rb_str_concat_literals, n.into(), first_string_ptr); + asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base)) +} +fn gen_pop_opnds(asm: &mut Assembler, opnds: &[Opnd]) { asm_comment!(asm, "restore C stack pointer"); + let allocation_size = aligned_stack_bytes(opnds.len()); asm.add_into(NATIVE_STACK_PTR, allocation_size.into()); +} + +fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec, state: &FrameState) -> Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + + let first_opnd_ptr = gen_push_opnds(jit, asm, &values); + + let tmp_ary = asm_ccall!(asm, rb_ary_tmp_new_from_values, Opnd::Imm(0), values.len().into(), first_opnd_ptr); + let result = asm_ccall!(asm, rb_reg_new_ary, tmp_ary, opt.into()); + asm_ccall!(asm, rb_ary_clear, tmp_ary); + + gen_pop_opnds(asm, &values); + + result +} + +fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec, state: &FrameState) -> Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + + let first_string_ptr = gen_push_opnds(jit, asm, &strings); + let result = asm_ccall!(asm, rb_str_concat_literals, strings.len().into(), first_string_ptr); + gen_pop_opnds(asm, &strings); result } diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 5c939fabe7f0d6..524b06b58047f5 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -30,6 +30,11 @@ impl ::std::fmt::Debug for __IncompleteArrayField { fmt.write_str("__IncompleteArrayField") } } +pub const ONIG_OPTION_IGNORECASE: u32 = 1; +pub const ONIG_OPTION_EXTEND: u32 = 2; +pub const ONIG_OPTION_MULTILINE: u32 = 4; +pub const ARG_ENCODING_FIXED: u32 = 16; +pub const ARG_ENCODING_NONE: u32 = 32; pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1; pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2; pub const STRING_REDEFINED_OP_FLAG: u32 = 4; diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index afe358ec1d6a86..7c7e09663b07ad 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -473,6 +473,9 @@ pub enum Insn { StringIntern { val: InsnId, state: InsnId }, StringConcat { strings: Vec, state: InsnId }, + /// Combine count stack values into a regexp + ToRegexp { opt: usize, values: Vec, state: InsnId }, + /// Put special object (VMCORE, CBASE, etc.) based on value_type PutSpecialObject { value_type: SpecialObjectType }, @@ -668,6 +671,14 @@ pub struct InsnPrinter<'a> { ptr_map: &'a PtrPrintMap, } +static REGEXP_FLAGS: &[(u32, &str)] = &[ + (ONIG_OPTION_MULTILINE, "MULTILINE"), + (ONIG_OPTION_IGNORECASE, "IGNORECASE"), + (ONIG_OPTION_EXTEND, "EXTENDED"), + (ARG_ENCODING_FIXED, "FIXEDENCODING"), + (ARG_ENCODING_NONE, "NOENCODING"), +]; + impl<'a> std::fmt::Display for InsnPrinter<'a> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match &self.inner { @@ -716,6 +727,28 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Ok(()) } + Insn::ToRegexp { values, opt, .. } => { + write!(f, "ToRegexp")?; + let mut prefix = " "; + for value in values { + write!(f, "{prefix}{value}")?; + prefix = ", "; + } + + let opt = *opt as u32; + if opt != 0 { + write!(f, ", ")?; + let mut sep = ""; + for (flag, name) in REGEXP_FLAGS { + if opt & flag != 0 { + write!(f, "{sep}{name}")?; + sep = "|"; + } + } + } + + Ok(()) + } Insn::Test { val } => { write!(f, "Test {val}") } Insn::IsNil { val } => { write!(f, "IsNil {val}") } Insn::Jump(target) => { write!(f, "Jump {target}") } @@ -1179,6 +1212,7 @@ impl Function { &StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state }, &StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) }, &StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) }, + &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state }, &Test { val } => Test { val: find!(val) }, &IsNil { val } => IsNil { val: find!(val) }, &Jump(ref target) => Jump(find_branch_edge!(target)), @@ -1305,6 +1339,7 @@ impl Function { Insn::StringCopy { .. } => types::StringExact, Insn::StringIntern { .. } => types::Symbol, Insn::StringConcat { .. } => types::StringExact, + Insn::ToRegexp { .. } => types::RegexpExact, Insn::NewArray { .. } => types::ArrayExact, Insn::ArrayDup { .. } => types::ArrayExact, Insn::NewHash { .. } => types::HashExact, @@ -1939,6 +1974,10 @@ impl Function { worklist.extend(strings); worklist.push_back(state); } + &Insn::ToRegexp { ref values, state, .. } => { + worklist.extend(values); + worklist.push_back(state); + } | &Insn::Return { val } | &Insn::Throw { val, .. } | &Insn::Test { val } @@ -2863,6 +2902,15 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let insn_id = fun.push_insn(block, Insn::StringConcat { strings, state: exit_id }); state.stack_push(insn_id); } + YARVINSN_toregexp => { + // First arg contains the options (multiline, extended, ignorecase) used to create the regexp + let opt = get_arg(pc, 0).as_usize(); + let count = get_arg(pc, 1).as_usize(); + let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); + let values = state.stack_pop_n(count)?; + let insn_id = fun.push_insn(block, Insn::ToRegexp { opt, values, state: exit_id }); + state.stack_push(insn_id); + } YARVINSN_newarray => { let count = get_arg(pc, 0).as_usize(); let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); @@ -5330,6 +5378,47 @@ mod tests { "#]]); } + #[test] + fn test_toregexp() { + eval(r##" + def test = /#{1}#{2}#{3}/ + "##); + assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#" + fn test@:2: + bb0(v0:BasicObject): + v2:Fixnum[1] = Const Value(1) + v4:BasicObject = ObjToString v2 + v6:String = AnyToString v2, str: v4 + v7:Fixnum[2] = Const Value(2) + v9:BasicObject = ObjToString v7 + v11:String = AnyToString v7, str: v9 + v12:Fixnum[3] = Const Value(3) + v14:BasicObject = ObjToString v12 + v16:String = AnyToString v12, str: v14 + v18:RegexpExact = ToRegexp v6, v11, v16 + Return v18 + "#]]); + } + + #[test] + fn test_toregexp_with_options() { + eval(r##" + def test = /#{1}#{2}/mixn + "##); + assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#" + fn test@:2: + bb0(v0:BasicObject): + v2:Fixnum[1] = Const Value(1) + v4:BasicObject = ObjToString v2 + v6:String = AnyToString v2, str: v4 + v7:Fixnum[2] = Const Value(2) + v9:BasicObject = ObjToString v7 + v11:String = AnyToString v7, str: v9 + v13:RegexpExact = ToRegexp v6, v11, MULTILINE|IGNORECASE|EXTENDED|NOENCODING + Return v13 + "#]]); + } + #[test] fn throw() { eval(" From 6fe4ed507fc517a22d18f1ea77a4b3a3071310e4 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Tue, 19 Aug 2025 07:26:19 -0700 Subject: [PATCH 7/8] ZJIT: Remove more Option from codegen (#14265) --- zjit/src/codegen.rs | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 37f10c92eb089d..5fb83b5f48a5fa 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -357,7 +357,10 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::SendWithoutBlockDirect { cd, state, self_val, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args)), Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), opnds!(args), &function.frame_state(*state)), - Insn::InvokeBuiltin { bf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, opnds!(args))?, + // Ensure we have enough room fit ec, self, and arguments + // TODO remove this check when we have stack args (we can use Time.new to test it) + Insn::InvokeBuiltin { bf, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return None, + Insn::InvokeBuiltin { bf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, opnds!(args)), Insn::Return { val } => no_output!(gen_return(asm, opnd!(val))), Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state)), Insn::FixnumSub { left, right, state } => gen_fixnum_sub(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state)), @@ -372,7 +375,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::FixnumOr { left, right } => gen_fixnum_or(asm, opnd!(left), opnd!(right)), Insn::IsNil { val } => gen_isnil(asm, opnd!(val)), Insn::Test { val } => gen_test(asm, opnd!(val)), - Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state))?, + Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardBitEquals { val, expected, state } => gen_guard_bit_equals(jit, asm, opnd!(val), *expected, &function.frame_state(*state)), Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))), Insn::CCall { cfun, args, name: _, return_type: _, elidable: _ } => gen_ccall(asm, *cfun, opnds!(args)), @@ -543,22 +546,18 @@ fn gen_get_constant_path(jit: &JITState, asm: &mut Assembler, ic: *const iseq_in asm_ccall!(asm, rb_vm_opt_getconstant_path, EC, CFP, Opnd::const_ptr(ic)) } -fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, args: Vec) -> Option { - // Ensure we have enough room fit ec, self, and arguments - // TODO remove this check when we have stack args (we can use Time.new to test it) - if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) { - return None; - } - +fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, args: Vec) -> lir::Opnd { + assert!(bf.argc + 2 <= C_ARG_OPNDS.len() as i32, + "gen_invokebuiltin should not be called for builtin function {} with too many arguments: {}", + unsafe { std::ffi::CStr::from_ptr(bf.name).to_str().unwrap() }, + bf.argc); // Anything can happen inside builtin functions gen_prepare_non_leaf_call(jit, asm, state); let mut cargs = vec![EC]; cargs.extend(args); - let val = asm.ccall(bf.func_ptr as *const u8, cargs); - - Some(val) + asm.ccall(bf.func_ptr as *const u8, cargs) } /// Record a patch point that should be invalidated on a given invariant @@ -1173,7 +1172,7 @@ fn gen_test(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { } /// Compile a type check with a side exit -fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard_type: Type, state: &FrameState) -> Option { +fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard_type: Type, state: &FrameState) -> lir::Opnd { if guard_type.is_subtype(types::Fixnum) { asm.test(val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); asm.jz(side_exit(jit, state, GuardType(guard_type))); @@ -1186,7 +1185,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard // Static symbols have (val & 0xff) == RUBY_SYMBOL_FLAG // Use 8-bit comparison like YJIT does debug_assert!(val.try_num_bits(8).is_some(), "GuardType should not be used for a known constant, but val was: {val:?}"); - asm.cmp(val.try_num_bits(8)?, Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + asm.cmp(val.with_num_bits(8), Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); asm.jne(side_exit(jit, state, GuardType(guard_type))); } else if guard_type.is_subtype(types::NilClass) { asm.cmp(val, Qnil.into()); @@ -1227,7 +1226,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard } else { unimplemented!("unsupported type: {guard_type}"); } - Some(val) + val } /// Compile an identity check with a side exit From 3ff1ca07bab3603ad2c0744983d5d7b8b9ac3a44 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Tue, 12 Aug 2025 15:22:23 -0700 Subject: [PATCH 8/8] [ruby/openssl] Add missing write barriers in X509 Both the X509 store and X509 store context were missing write barriers. To the callback object being stored in the ex data. These values were also being stored as an IV, however in Ruby HEAD we're now storing the IVs for T_DATA (generic IVs) on a separate object. So we need an additional write barrier. I believe this was always necessary, because we could have done incremental marking ahead of compaction, and without the write barrier the mark function could have been run before @verify_callback was assigned. This was detected by wbcheck https://github.com/ruby/openssl/commit/1fda3a99ef --- ext/openssl/ossl_x509store.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/openssl/ossl_x509store.c b/ext/openssl/ossl_x509store.c index 8291578f274b6d..c18596cbf5be73 100644 --- a/ext/openssl/ossl_x509store.c +++ b/ext/openssl/ossl_x509store.c @@ -191,8 +191,8 @@ ossl_x509store_set_vfy_cb(VALUE self, VALUE cb) GetX509Store(self, store); rb_iv_set(self, "@verify_callback", cb); - // We don't need to trigger a write barrier because `rb_iv_set` did it. X509_STORE_set_ex_data(store, store_ex_verify_cb_idx, (void *)cb); + RB_OBJ_WRITTEN(self, Qundef, cb); return cb; } @@ -611,6 +611,7 @@ ossl_x509stctx_verify(VALUE self) GetX509StCtx(self, ctx); VALUE cb = rb_iv_get(self, "@verify_callback"); X509_STORE_CTX_set_ex_data(ctx, stctx_ex_verify_cb_idx, (void *)cb); + RB_OBJ_WRITTEN(self, Qundef, cb); switch (X509_verify_cert(ctx)) { case 1: