diff --git a/ext/openssl/ossl_x509store.c b/ext/openssl/ossl_x509store.c index 8291578f274b6d..c18596cbf5be73 100644 --- a/ext/openssl/ossl_x509store.c +++ b/ext/openssl/ossl_x509store.c @@ -191,8 +191,8 @@ ossl_x509store_set_vfy_cb(VALUE self, VALUE cb) GetX509Store(self, store); rb_iv_set(self, "@verify_callback", cb); - // We don't need to trigger a write barrier because `rb_iv_set` did it. X509_STORE_set_ex_data(store, store_ex_verify_cb_idx, (void *)cb); + RB_OBJ_WRITTEN(self, Qundef, cb); return cb; } @@ -611,6 +611,7 @@ ossl_x509stctx_verify(VALUE self) GetX509StCtx(self, ctx); VALUE cb = rb_iv_get(self, "@verify_callback"); X509_STORE_CTX_set_ex_data(ctx, stctx_ex_verify_cb_idx, (void *)cb); + RB_OBJ_WRITTEN(self, Qundef, cb); switch (X509_verify_cert(ctx)) { case 1: diff --git a/internal/re.h b/internal/re.h index 2788f8b42a75b3..593e5c464fdfb3 100644 --- a/internal/re.h +++ b/internal/re.h @@ -25,4 +25,9 @@ int rb_match_count(VALUE match); VALUE rb_reg_new_ary(VALUE ary, int options); VALUE rb_reg_last_defined(VALUE match); +#define ARG_REG_OPTION_MASK \ + (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND) +#define ARG_ENCODING_FIXED 16 +#define ARG_ENCODING_NONE 32 + #endif /* INTERNAL_RE_H */ diff --git a/re.c b/re.c index 9348622eea2936..13d7f0ef9e5fc7 100644 --- a/re.c +++ b/re.c @@ -290,11 +290,6 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) #define KCODE_FIXED FL_USER4 -#define ARG_REG_OPTION_MASK \ - (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND) -#define ARG_ENCODING_FIXED 16 -#define ARG_ENCODING_NONE 32 - static int char_to_option(int c) { diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb index dbf041a7321252..576a5f60649666 100644 --- a/test/ruby/test_hash.rb +++ b/test/ruby/test_hash.rb @@ -880,21 +880,20 @@ def test_inspect assert_equal(quote1, eval(quote1).inspect) assert_equal(quote2, eval(quote2).inspect) assert_equal(quote3, eval(quote3).inspect) - begin - verbose_bak, $VERBOSE = $VERBOSE, nil - enc = Encoding.default_external - Encoding.default_external = Encoding::ASCII + + EnvUtil.with_default_external(Encoding::ASCII) do utf8_ascii_hash = '{"\\u3042": 1}' assert_equal(eval(utf8_ascii_hash).inspect, utf8_ascii_hash) - Encoding.default_external = Encoding::UTF_8 + end + + EnvUtil.with_default_external(Encoding::UTF_8) do utf8_hash = "{\u3042: 1}" assert_equal(eval(utf8_hash).inspect, utf8_hash) - Encoding.default_external = Encoding::Windows_31J + end + + EnvUtil.with_default_external(Encoding::Windows_31J) do sjis_hash = "{\x87]: 1}".force_encoding('sjis') assert_equal(eval(sjis_hash).inspect, sjis_hash) - ensure - Encoding.default_external = enc - $VERBOSE = verbose_bak end end diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index b0e2e9f849eeeb..9f7a3c7f4b7ec4 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -186,33 +186,35 @@ def test_string_inspect_invalid end def test_string_inspect_encoding - EnvUtil.suppress_warning do - begin - orig_int = Encoding.default_internal - orig_ext = Encoding.default_external - Encoding.default_internal = nil - [Encoding::UTF_8, Encoding::EUC_JP, Encoding::Windows_31J, Encoding::GB18030]. - each do |e| - Encoding.default_external = e - str = "\x81\x30\x81\x30".force_encoding('GB18030') - assert_equal(Encoding::GB18030 == e ? %{"#{str}"} : '"\x{81308130}"', str.inspect) - str = e("\xa1\x8f\xa1\xa1") - expected = "\"\\xA1\x8F\xA1\xA1\"".force_encoding("EUC-JP") - assert_equal(Encoding::EUC_JP == e ? expected : "\"\\xA1\\x{8FA1A1}\"", str.inspect) - str = s("\x81@") - assert_equal(Encoding::Windows_31J == e ? %{"#{str}"} : '"\x{8140}"', str.inspect) - str = "\u3042\u{10FFFD}" - assert_equal(Encoding::UTF_8 == e ? %{"#{str}"} : '"\u3042\u{10FFFD}"', str.inspect) - end - Encoding.default_external = Encoding::UTF_8 - [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE, - Encoding::UTF8_SOFTBANK].each do |e| - str = "abc".encode(e) - assert_equal('"abc"', str.inspect) - end - ensure - Encoding.default_internal = orig_int - Encoding.default_external = orig_ext + [ + Encoding::UTF_8, + Encoding::EUC_JP, + Encoding::Windows_31J, + Encoding::GB18030, + ].each do |e| + EnvUtil.with_default_external(e) do + str = "\x81\x30\x81\x30".force_encoding('GB18030') + assert_equal(Encoding::GB18030 == e ? %{"#{str}"} : '"\x{81308130}"', str.inspect) + str = e("\xa1\x8f\xa1\xa1") + expected = "\"\\xA1\x8F\xA1\xA1\"".force_encoding("EUC-JP") + assert_equal(Encoding::EUC_JP == e ? expected : "\"\\xA1\\x{8FA1A1}\"", str.inspect) + str = s("\x81@") + assert_equal(Encoding::Windows_31J == e ? %{"#{str}"} : '"\x{8140}"', str.inspect) + str = "\u3042\u{10FFFD}" + assert_equal(Encoding::UTF_8 == e ? %{"#{str}"} : '"\u3042\u{10FFFD}"', str.inspect) + end + end + + EnvUtil.with_default_external(Encoding::UTF_8) do + [ + Encoding::UTF_16BE, + Encoding::UTF_16LE, + Encoding::UTF_32BE, + Encoding::UTF_32LE, + Encoding::UTF8_SOFTBANK + ].each do |e| + str = "abc".encode(e) + assert_equal('"abc"', str.inspect) end end end @@ -246,59 +248,43 @@ def test_utf_without_bom_valid end def test_object_utf16_32_inspect - EnvUtil.suppress_warning do - begin - orig_int = Encoding.default_internal - orig_ext = Encoding.default_external - Encoding.default_internal = nil - Encoding.default_external = Encoding::UTF_8 - o = Object.new - [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].each do |e| - o.instance_eval "undef inspect;def inspect;'abc'.encode('#{e}');end" - assert_equal '[abc]', [o].inspect - end - ensure - Encoding.default_internal = orig_int - Encoding.default_external = orig_ext + EnvUtil.with_default_external(Encoding::UTF_8) do + o = Object.new + [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].each do |e| + o.instance_eval "undef inspect;def inspect;'abc'.encode('#{e}');end" + assert_equal '[abc]', [o].inspect end end end def test_object_inspect_external - orig_v, $VERBOSE = $VERBOSE, false - orig_int, Encoding.default_internal = Encoding.default_internal, nil - orig_ext = Encoding.default_external - omit "https://bugs.ruby-lang.org/issues/18338" o = Object.new - Encoding.default_external = Encoding::UTF_16BE - def o.inspect - "abc" - end - assert_nothing_raised(Encoding::CompatibilityError) { [o].inspect } + EnvUtil.with_default_external(Encoding::UTF_16BE) do + def o.inspect + "abc" + end + assert_nothing_raised(Encoding::CompatibilityError) { [o].inspect } - def o.inspect - "abc".encode(Encoding.default_external) + def o.inspect + "abc".encode(Encoding.default_external) + end + assert_equal '[abc]', [o].inspect end - assert_equal '[abc]', [o].inspect - - Encoding.default_external = Encoding::US_ASCII - def o.inspect - "\u3042" - end - assert_equal '[\u3042]', [o].inspect + EnvUtil.with_default_external(Encoding::US_ASCII) do + def o.inspect + "\u3042" + end + assert_equal '[\u3042]', [o].inspect - def o.inspect - "\x82\xa0".force_encoding(Encoding::Windows_31J) + def o.inspect + "\x82\xa0".force_encoding(Encoding::Windows_31J) + end + assert_equal '[\x{82A0}]', [o].inspect end - assert_equal '[\x{82A0}]', [o].inspect - ensure - Encoding.default_internal = orig_int - Encoding.default_external = orig_ext - $VERBOSE = orig_v end def test_str_dump diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index c7e4b0c1ec7fee..1e0f31ba7c540a 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -3251,18 +3251,12 @@ def test_ascii_incomat_inspect assert_equal('"\\u3042\\u3044\\u3046"', S("\u3042\u3044\u3046".encode(e)).inspect) assert_equal('"ab\\"c"', S("ab\"c".encode(e)).inspect, bug4081) end - begin - verbose, $VERBOSE = $VERBOSE, nil - ext = Encoding.default_external - Encoding.default_external = "us-ascii" - $VERBOSE = verbose + + EnvUtil.with_default_external(Encoding::US_ASCII) do i = S("abc\"\\".force_encoding("utf-8")).inspect - ensure - $VERBOSE = nil - Encoding.default_external = ext - $VERBOSE = verbose + + assert_equal('"abc\\"\\\\"', i, bug4081) end - assert_equal('"abc\\"\\\\"', i, bug4081) end def test_dummy_inspect diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 96ac99b6db6195..e18333a58f07f7 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1847,6 +1847,14 @@ def test = "#{}" }, insns: [:concatstrings] end + def test_regexp_interpolation + assert_compiles '/123/', %q{ + def test = /#{1}#{2}#{3}/ + + test + }, insns: [:toregexp] + end + private # Assert that every method call in `test_script` can be compiled by ZJIT diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 77299c26574675..59b7f9737ea233 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -259,6 +259,13 @@ fn main() { // From internal/re.h .allowlist_function("rb_reg_new_ary") + .allowlist_var("ARG_ENCODING_FIXED") + .allowlist_var("ARG_ENCODING_NONE") + + // From include/ruby/onigmo.h + .allowlist_var("ONIG_OPTION_IGNORECASE") + .allowlist_var("ONIG_OPTION_EXTEND") + .allowlist_var("ONIG_OPTION_MULTILINE") // `ruby_value_type` is a C enum and this stops it from // prefixing all the members with the name of the type diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 5780a2635778f9..5fb83b5f48a5fa 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -346,6 +346,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::StringConcat { strings, .. } if strings.is_empty() => return None, Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state)), Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state)), + Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state)), Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"), Insn::Snapshot { .. } => return Some(()), // we don't need to do anything for this instruction at the moment Insn::Jump(branch) => no_output!(gen_jump(jit, asm, branch)), @@ -356,7 +357,10 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::SendWithoutBlockDirect { cd, state, self_val, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args)), Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), opnds!(args), &function.frame_state(*state)), - Insn::InvokeBuiltin { bf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, opnds!(args))?, + // Ensure we have enough room fit ec, self, and arguments + // TODO remove this check when we have stack args (we can use Time.new to test it) + Insn::InvokeBuiltin { bf, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return None, + Insn::InvokeBuiltin { bf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, opnds!(args)), Insn::Return { val } => no_output!(gen_return(asm, opnd!(val))), Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state)), Insn::FixnumSub { left, right, state } => gen_fixnum_sub(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state)), @@ -371,7 +375,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::FixnumOr { left, right } => gen_fixnum_or(asm, opnd!(left), opnd!(right)), Insn::IsNil { val } => gen_isnil(asm, opnd!(val)), Insn::Test { val } => gen_test(asm, opnd!(val)), - Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state))?, + Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardBitEquals { val, expected, state } => gen_guard_bit_equals(jit, asm, opnd!(val), *expected, &function.frame_state(*state)), Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))), Insn::CCall { cfun, args, name: _, return_type: _, elidable: _ } => gen_ccall(asm, *cfun, opnds!(args)), @@ -542,22 +546,18 @@ fn gen_get_constant_path(jit: &JITState, asm: &mut Assembler, ic: *const iseq_in asm_ccall!(asm, rb_vm_opt_getconstant_path, EC, CFP, Opnd::const_ptr(ic)) } -fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, args: Vec) -> Option { - // Ensure we have enough room fit ec, self, and arguments - // TODO remove this check when we have stack args (we can use Time.new to test it) - if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) { - return None; - } - +fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, args: Vec) -> lir::Opnd { + assert!(bf.argc + 2 <= C_ARG_OPNDS.len() as i32, + "gen_invokebuiltin should not be called for builtin function {} with too many arguments: {}", + unsafe { std::ffi::CStr::from_ptr(bf.name).to_str().unwrap() }, + bf.argc); // Anything can happen inside builtin functions gen_prepare_non_leaf_call(jit, asm, state); let mut cargs = vec![EC]; cargs.extend(args); - let val = asm.ccall(bf.func_ptr as *const u8, cargs); - - Some(val) + asm.ccall(bf.func_ptr as *const u8, cargs) } /// Record a patch point that should be invalidated on a given invariant @@ -1172,7 +1172,7 @@ fn gen_test(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { } /// Compile a type check with a side exit -fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard_type: Type, state: &FrameState) -> Option { +fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard_type: Type, state: &FrameState) -> lir::Opnd { if guard_type.is_subtype(types::Fixnum) { asm.test(val, Opnd::UImm(RUBY_FIXNUM_FLAG as u64)); asm.jz(side_exit(jit, state, GuardType(guard_type))); @@ -1185,7 +1185,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard // Static symbols have (val & 0xff) == RUBY_SYMBOL_FLAG // Use 8-bit comparison like YJIT does debug_assert!(val.try_num_bits(8).is_some(), "GuardType should not be used for a known constant, but val was: {val:?}"); - asm.cmp(val.try_num_bits(8)?, Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); + asm.cmp(val.with_num_bits(8), Opnd::UImm(RUBY_SYMBOL_FLAG as u64)); asm.jne(side_exit(jit, state, GuardType(guard_type))); } else if guard_type.is_subtype(types::NilClass) { asm.cmp(val, Qnil.into()); @@ -1226,7 +1226,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard } else { unimplemented!("unsupported type: {guard_type}"); } - Some(val) + val } /// Compile an identity check with a side exit @@ -1595,36 +1595,58 @@ pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Option { }) } -fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec, state: &FrameState) -> Opnd { - gen_prepare_non_leaf_call(jit, asm, state); +fn gen_push_opnds(jit: &mut JITState, asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd { + let n = opnds.len(); // Calculate the compile-time NATIVE_STACK_PTR offset from NATIVE_BASE_PTR // At this point, frame_setup(&[], jit.c_stack_slots) has been called, // which allocated aligned_stack_bytes(jit.c_stack_slots) on the stack let frame_size = aligned_stack_bytes(jit.c_stack_slots); - let n = strings.len(); let allocation_size = aligned_stack_bytes(n); - asm_comment!(asm, "allocate {} bytes on C stack for {} strings", allocation_size, n); + asm_comment!(asm, "allocate {} bytes on C stack for {} values", allocation_size, n); asm.sub_into(NATIVE_STACK_PTR, allocation_size.into()); // Calculate the total offset from NATIVE_BASE_PTR to our buffer let total_offset_from_base = (frame_size + allocation_size) as i32; - for (idx, &string_opnd) in strings.iter().enumerate() { + for (idx, &opnd) in opnds.iter().enumerate() { let slot_offset = -total_offset_from_base + (idx as i32 * SIZEOF_VALUE_I32); asm.mov( Opnd::mem(VALUE_BITS, NATIVE_BASE_PTR, slot_offset), - string_opnd + opnd ); } - let first_string_ptr = asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base)); - - let result = asm_ccall!(asm, rb_str_concat_literals, n.into(), first_string_ptr); + asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base)) +} +fn gen_pop_opnds(asm: &mut Assembler, opnds: &[Opnd]) { asm_comment!(asm, "restore C stack pointer"); + let allocation_size = aligned_stack_bytes(opnds.len()); asm.add_into(NATIVE_STACK_PTR, allocation_size.into()); +} + +fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec, state: &FrameState) -> Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + + let first_opnd_ptr = gen_push_opnds(jit, asm, &values); + + let tmp_ary = asm_ccall!(asm, rb_ary_tmp_new_from_values, Opnd::Imm(0), values.len().into(), first_opnd_ptr); + let result = asm_ccall!(asm, rb_reg_new_ary, tmp_ary, opt.into()); + asm_ccall!(asm, rb_ary_clear, tmp_ary); + + gen_pop_opnds(asm, &values); + + result +} + +fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec, state: &FrameState) -> Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + + let first_string_ptr = gen_push_opnds(jit, asm, &strings); + let result = asm_ccall!(asm, rb_str_concat_literals, strings.len().into(), first_string_ptr); + gen_pop_opnds(asm, &strings); result } diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 5c939fabe7f0d6..524b06b58047f5 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -30,6 +30,11 @@ impl ::std::fmt::Debug for __IncompleteArrayField { fmt.write_str("__IncompleteArrayField") } } +pub const ONIG_OPTION_IGNORECASE: u32 = 1; +pub const ONIG_OPTION_EXTEND: u32 = 2; +pub const ONIG_OPTION_MULTILINE: u32 = 4; +pub const ARG_ENCODING_FIXED: u32 = 16; +pub const ARG_ENCODING_NONE: u32 = 32; pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1; pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2; pub const STRING_REDEFINED_OP_FLAG: u32 = 4; diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index afe358ec1d6a86..7c7e09663b07ad 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -473,6 +473,9 @@ pub enum Insn { StringIntern { val: InsnId, state: InsnId }, StringConcat { strings: Vec, state: InsnId }, + /// Combine count stack values into a regexp + ToRegexp { opt: usize, values: Vec, state: InsnId }, + /// Put special object (VMCORE, CBASE, etc.) based on value_type PutSpecialObject { value_type: SpecialObjectType }, @@ -668,6 +671,14 @@ pub struct InsnPrinter<'a> { ptr_map: &'a PtrPrintMap, } +static REGEXP_FLAGS: &[(u32, &str)] = &[ + (ONIG_OPTION_MULTILINE, "MULTILINE"), + (ONIG_OPTION_IGNORECASE, "IGNORECASE"), + (ONIG_OPTION_EXTEND, "EXTENDED"), + (ARG_ENCODING_FIXED, "FIXEDENCODING"), + (ARG_ENCODING_NONE, "NOENCODING"), +]; + impl<'a> std::fmt::Display for InsnPrinter<'a> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match &self.inner { @@ -716,6 +727,28 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Ok(()) } + Insn::ToRegexp { values, opt, .. } => { + write!(f, "ToRegexp")?; + let mut prefix = " "; + for value in values { + write!(f, "{prefix}{value}")?; + prefix = ", "; + } + + let opt = *opt as u32; + if opt != 0 { + write!(f, ", ")?; + let mut sep = ""; + for (flag, name) in REGEXP_FLAGS { + if opt & flag != 0 { + write!(f, "{sep}{name}")?; + sep = "|"; + } + } + } + + Ok(()) + } Insn::Test { val } => { write!(f, "Test {val}") } Insn::IsNil { val } => { write!(f, "IsNil {val}") } Insn::Jump(target) => { write!(f, "Jump {target}") } @@ -1179,6 +1212,7 @@ impl Function { &StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state }, &StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) }, &StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) }, + &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state }, &Test { val } => Test { val: find!(val) }, &IsNil { val } => IsNil { val: find!(val) }, &Jump(ref target) => Jump(find_branch_edge!(target)), @@ -1305,6 +1339,7 @@ impl Function { Insn::StringCopy { .. } => types::StringExact, Insn::StringIntern { .. } => types::Symbol, Insn::StringConcat { .. } => types::StringExact, + Insn::ToRegexp { .. } => types::RegexpExact, Insn::NewArray { .. } => types::ArrayExact, Insn::ArrayDup { .. } => types::ArrayExact, Insn::NewHash { .. } => types::HashExact, @@ -1939,6 +1974,10 @@ impl Function { worklist.extend(strings); worklist.push_back(state); } + &Insn::ToRegexp { ref values, state, .. } => { + worklist.extend(values); + worklist.push_back(state); + } | &Insn::Return { val } | &Insn::Throw { val, .. } | &Insn::Test { val } @@ -2863,6 +2902,15 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let insn_id = fun.push_insn(block, Insn::StringConcat { strings, state: exit_id }); state.stack_push(insn_id); } + YARVINSN_toregexp => { + // First arg contains the options (multiline, extended, ignorecase) used to create the regexp + let opt = get_arg(pc, 0).as_usize(); + let count = get_arg(pc, 1).as_usize(); + let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); + let values = state.stack_pop_n(count)?; + let insn_id = fun.push_insn(block, Insn::ToRegexp { opt, values, state: exit_id }); + state.stack_push(insn_id); + } YARVINSN_newarray => { let count = get_arg(pc, 0).as_usize(); let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); @@ -5330,6 +5378,47 @@ mod tests { "#]]); } + #[test] + fn test_toregexp() { + eval(r##" + def test = /#{1}#{2}#{3}/ + "##); + assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#" + fn test@:2: + bb0(v0:BasicObject): + v2:Fixnum[1] = Const Value(1) + v4:BasicObject = ObjToString v2 + v6:String = AnyToString v2, str: v4 + v7:Fixnum[2] = Const Value(2) + v9:BasicObject = ObjToString v7 + v11:String = AnyToString v7, str: v9 + v12:Fixnum[3] = Const Value(3) + v14:BasicObject = ObjToString v12 + v16:String = AnyToString v12, str: v14 + v18:RegexpExact = ToRegexp v6, v11, v16 + Return v18 + "#]]); + } + + #[test] + fn test_toregexp_with_options() { + eval(r##" + def test = /#{1}#{2}/mixn + "##); + assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#" + fn test@:2: + bb0(v0:BasicObject): + v2:Fixnum[1] = Const Value(1) + v4:BasicObject = ObjToString v2 + v6:String = AnyToString v2, str: v4 + v7:Fixnum[2] = Const Value(2) + v9:BasicObject = ObjToString v7 + v11:String = AnyToString v7, str: v9 + v13:RegexpExact = ToRegexp v6, v11, MULTILINE|IGNORECASE|EXTENDED|NOENCODING + Return v13 + "#]]); + } + #[test] fn throw() { eval("