diff --git a/encoding.c b/encoding.c index 2416acecea8c68..bda40eb04392c7 100644 --- a/encoding.c +++ b/encoding.c @@ -357,6 +357,12 @@ enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_enc struct rb_encoding_entry *ent = &enc_table->list[index]; rb_raw_encoding *encoding; + if (ent->loaded) { + RUBY_ASSERT(ent->base == base_encoding); + RUBY_ASSERT(!strcmp(name, ent->name)); + return index; + } + if (!valid_encoding_name_p(name)) return -1; if (!ent->name) { ent->name = name = strdup(name); @@ -369,14 +375,20 @@ enc_register_at(struct enc_table *enc_table, int index, const char *name, rb_enc encoding = xmalloc(sizeof(rb_encoding)); } + rb_raw_encoding tmp_encoding; if (base_encoding) { - *encoding = *base_encoding; + tmp_encoding = *base_encoding; } else { - memset(encoding, 0, sizeof(*ent->enc)); + memset(&tmp_encoding, 0, sizeof(*ent->enc)); } - encoding->name = name; - encoding->ruby_encoding_index = index; + tmp_encoding.name = name; + tmp_encoding.ruby_encoding_index = index; + + // FIXME: If encoding already existed, it may be concurrently accessed + // It's technically invalid to write to this memory as it's read, but as all + // values are set up it _probably_ works. + *encoding = tmp_encoding; ent->enc = encoding; st_insert(enc_table->names, (st_data_t)name, (st_data_t)index); @@ -408,7 +420,9 @@ enc_from_index(struct enc_table *enc_table, int index) if (UNLIKELY(index < 0 || enc_table->count <= (index &= ENC_INDEX_MASK))) { return 0; } - return enc_table->list[index].enc; + rb_encoding *enc = enc_table->list[index].enc; + RUBY_ASSERT(ENC_TO_ENCINDEX(enc) == index); + return enc; } rb_encoding * @@ -827,7 +841,7 @@ enc_autoload_body(rb_encoding *enc) GLOBAL_ENC_TABLE_LOCKING(enc_table) { i = enc->ruby_encoding_index; enc_register_at(enc_table, i & ENC_INDEX_MASK, rb_enc_name(enc), base); - ((rb_raw_encoding *)enc)->ruby_encoding_index = i; + RUBY_ASSERT(((rb_raw_encoding *)enc)->ruby_encoding_index == i); } } diff --git a/lib/erb.rb b/lib/erb.rb index 9c4cac3f10c988..d88cce9f4b1722 100644 --- a/lib/erb.rb +++ b/lib/erb.rb @@ -511,24 +511,32 @@ # # ## Encodings # -# In general, an \ERB result string (or Ruby code generated by \ERB) -# has the same encoding as the string originally passed to ERB.new; -# see [Encoding][encoding]. +# An \ERB template has an [encoding][encoding], +# which is by default the encoding of the source string; +# the result string will also have that encoding. # -# You can specify the output encoding by adding a [magic comment][magic comments] +# ``` +# s = < +# EOT +# template = ERB.new(s) +# s.encoding # => # +# template.encoding # => # +# template.result.encoding # => # +# ``` +# +# You can specify a different encoding by adding a [magic comment][magic comments] # at the top of the given string: # # ``` -# s = < -# -# Some text. -# EOF -# # => "<%#-*- coding: Big5 -*-%>\n\nSome text.\n" -# s.encoding -# # => # -# ERB.new(s).result.encoding -# # => # +# <%# Comment. %> +# EOT +# template = ERB.new(s) +# s.encoding # => # +# template.encoding # => # +# template.result.encoding # => # # ``` # # ## Error Reporting @@ -873,7 +881,12 @@ def make_compiler(trim_mode) # The Ruby code generated by ERB attr_reader :src - # The encoding to eval + # :markup: markdown + # + # Returns the encoding of `self`; + # see [encoding][encoding]. + # + # [encoding]: https://docs.ruby-lang.org/en/master/Encoding.html attr_reader :encoding # :markup: markdown @@ -919,7 +932,13 @@ def set_eoutvar(compiler, eoutvar = '_erbout') compiler.post_cmd = [eoutvar] end - # Generate results and print them. (see ERB#result) + # :markup: markdown + # + # :call-seq: + # run(binding = new_toplevel) -> nil + # + # Like #result, but prints the result string (instead of returning it); + # returns `nil`. def run(b=new_toplevel) print self.result(b) end @@ -969,10 +988,21 @@ def result_with_hash(hash) result(b) end - ## - # Returns a new binding each time *near* TOPLEVEL_BINDING for runs that do - # not specify a binding. - + # :markup: markdown + # + # :call-seq: + # new_toplevel(symbols) -> new_binding + # + # Returns a new binding based on `TOPLEVEL_BINDING`; + # used to create a default binding for a call to #result. + # + # See [Default Binding][default binding]. + # + # Argument `symbols` is an array of symbols; + # each symbol `symbol` is used to define (unless already defined) a variable in the binding + # whose name is `symbol` and whose value is `nil`. + # + # [default binding]: rdoc-ref:ERB@Default+Binding def new_toplevel(vars = nil) b = TOPLEVEL_BINDING if vars diff --git a/object.c b/object.c index 56afc7e99b8ca2..c8d59601925e59 100644 --- a/object.c +++ b/object.c @@ -138,6 +138,9 @@ rb_class_allocate_instance(VALUE klass) for (size_t i = 0; i < ROBJECT_FIELDS_CAPACITY(obj); i++) { ptr[i] = Qundef; } + if (rb_obj_class(obj) != rb_class_real(klass)) { + rb_bug("Expected rb_class_allocate_instance to set the class correctly"); + } #endif return obj; @@ -2192,6 +2195,15 @@ class_get_alloc_func(VALUE klass) return allocator; } +// Might return NULL. +rb_alloc_func_t +rb_zjit_class_get_alloc_func(VALUE klass) +{ + assert(RCLASS_INITIALIZED_P(klass)); + assert(!RCLASS_SINGLETON_P(klass)); + return rb_get_alloc_func(klass); +} + static VALUE class_call_alloc_func(rb_alloc_func_t allocator, VALUE klass) { diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index 42d10490c5cee6..a6296084ea03da 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -851,6 +851,41 @@ def test = Foo.new }, insns: [:opt_new] end + def test_opt_new_invalidate_new + assert_compiles '["Foo", "foo"]', %q{ + class Foo; end + def test = Foo.new + test; test + result = [test.class.name] + def Foo.new = "foo" + result << test + result + }, insns: [:opt_new], call_threshold: 2 + end + + def test_opt_new_with_custom_allocator + assert_compiles '"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"', %q{ + require "digest" + def test = Digest::SHA256.new.hexdigest + test; test + }, insns: [:opt_new], call_threshold: 2 + end + + def test_opt_new_with_custom_allocator_raises + assert_compiles '[42, 42]', %q{ + require "digest" + class C < Digest::Base; end + def test + begin + Digest::Base.new + rescue NotImplementedError + 42 + end + end + [test, test] + }, insns: [:opt_new], call_threshold: 2 + end + def test_new_hash_empty assert_compiles '{}', %q{ def test = {} diff --git a/vm_insnhelper.c b/vm_insnhelper.c index 362af31188813b..8022a29a6e77e2 100644 --- a/vm_insnhelper.c +++ b/vm_insnhelper.c @@ -2353,6 +2353,12 @@ vm_search_method(VALUE cd_owner, struct rb_call_data *cd, VALUE recv) return vm_cc_cme(cc); } +const struct rb_callable_method_entry_struct * +rb_zjit_vm_search_method(VALUE cd_owner, struct rb_call_data *cd, VALUE recv) +{ + return vm_search_method(cd_owner, cd, recv); +} + #if __has_attribute(transparent_union) typedef union { VALUE (*anyargs)(ANYARGS); @@ -2417,6 +2423,12 @@ vm_method_cfunc_is(const rb_iseq_t *iseq, CALL_DATA cd, VALUE recv, cfunc_type f return check_cfunc(cme, func); } +bool +rb_zjit_cme_is_cfunc(const rb_callable_method_entry_t *me, const cfunc_type func) +{ + return check_cfunc(me, func); +} + int rb_vm_method_cfunc_is(const rb_iseq_t *iseq, CALL_DATA cd, VALUE recv, cfunc_type func) { diff --git a/zjit.c b/zjit.c index 6bbe508f241a67..21618c39b1f08f 100644 --- a/zjit.c +++ b/zjit.c @@ -170,6 +170,30 @@ rb_zjit_local_id(const rb_iseq_t *iseq, unsigned idx) return ISEQ_BODY(iseq)->local_table[idx]; } +bool rb_zjit_cme_is_cfunc(const rb_callable_method_entry_t *me, const void *func); + +const struct rb_callable_method_entry_struct * +rb_zjit_vm_search_method(VALUE cd_owner, struct rb_call_data *cd, VALUE recv); + +bool +rb_zjit_class_initialized_p(VALUE klass) +{ + return RCLASS_INITIALIZED_P(klass); +} + +rb_alloc_func_t rb_zjit_class_get_alloc_func(VALUE klass); + +VALUE rb_class_allocate_instance(VALUE klass); + +bool +rb_zjit_class_has_default_allocator(VALUE klass) +{ + assert(RCLASS_INITIALIZED_P(klass)); + assert(!RCLASS_SINGLETON_P(klass)); + rb_alloc_func_t alloc = rb_zjit_class_get_alloc_func(klass); + return alloc == rb_class_allocate_instance; +} + // Primitives used by zjit.rb. Don't put other functions below, which wouldn't use them. VALUE rb_zjit_assert_compiles(rb_execution_context_t *ec, VALUE self); VALUE rb_zjit_stats(rb_execution_context_t *ec, VALUE self, VALUE target_key); diff --git a/zjit.rb b/zjit.rb index 44bce453fff827..7d2fd3a10e3e00 100644 --- a/zjit.rb +++ b/zjit.rb @@ -44,6 +44,7 @@ def stats_string print_counters_with_prefix(prefix: 'compile_error_', prompt: 'compile error reasons', buf:, stats:, limit: 20) print_counters_with_prefix(prefix: 'exit_', prompt: 'side exit reasons', buf:, stats:, limit: 20) print_counters_with_prefix(prefix: 'dynamic_send_type_', prompt: 'dynamic send types', buf:, stats:, limit: 20) + print_counters_with_prefix(prefix: 'send_fallback_', prompt: 'send fallback def_types', buf:, stats:, limit: 20) # Show the most important stats ratio_in_zjit at the end print_counters([ diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index c6f02be415b83a..3bffdfd9ffa691 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -333,6 +333,8 @@ fn main() { .allowlist_function("rb_insn_name") .allowlist_function("rb_insn_len") .allowlist_function("rb_yarv_class_of") + .allowlist_function("rb_zjit_class_initialized_p") + .allowlist_function("rb_zjit_class_has_default_allocator") .allowlist_function("rb_get_ec_cfp") .allowlist_function("rb_get_cfp_iseq") .allowlist_function("rb_get_cfp_pc") @@ -342,6 +344,8 @@ fn main() { .allowlist_function("rb_get_cfp_ep_level") .allowlist_function("rb_get_cme_def_type") .allowlist_function("rb_zjit_constcache_shareable") + .allowlist_function("rb_zjit_vm_search_method") + .allowlist_function("rb_zjit_cme_is_cfunc") .allowlist_function("rb_get_cme_def_body_attr_id") .allowlist_function("rb_get_symbol_id") .allowlist_function("rb_get_cme_def_body_optimized_type") diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 12bd6d4aa3f104..9a4c7d6ed22b81 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -13,10 +13,10 @@ use crate::invariants::{track_bop_assumption, track_cme_assumption, track_no_ep_ use crate::gc::{append_gc_offsets, get_or_create_iseq_payload, get_or_create_iseq_payload_ptr, IseqPayload, IseqStatus}; use crate::state::ZJITState; use crate::stats::{exit_counter_for_compile_error, incr_counter, incr_counter_by, CompileError}; -use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compile_error}}; +use crate::stats::{counter_ptr, with_time_stat, Counter, send_fallback_counter, Counter::{compile_time_ns, exit_compile_error}}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SCRATCH_OPND, SP}; -use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SpecialBackrefSymbol, SELF_PARAM_IDX}; +use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, MethodType, SpecialObjectType, SpecialBackrefSymbol, SELF_PARAM_IDX}; use crate::hir::{Const, FrameState, Function, Insn, InsnId}; use crate::hir_type::{types, Type}; use crate::options::get_option; @@ -351,6 +351,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::NewRangeFixnum { low, high, flag, state } => gen_new_range_fixnum(asm, opnd!(low), opnd!(high), *flag, &function.frame_state(*state)), Insn::ArrayDup { val, state } => gen_array_dup(asm, opnd!(val), &function.frame_state(*state)), Insn::ObjectAlloc { val, state } => gen_object_alloc(jit, asm, opnd!(val), &function.frame_state(*state)), + &Insn::ObjectAllocClass { class, state } => gen_object_alloc_class(asm, class, &function.frame_state(state)), Insn::StringCopy { val, chilled, state } => gen_string_copy(asm, opnd!(val), *chilled, &function.frame_state(*state)), // concatstrings shouldn't have 0 strings // If it happens we abort the compilation for now @@ -364,10 +365,10 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::IfTrue { val, target } => no_output!(gen_if_true(jit, asm, opnd!(val), target)), Insn::IfFalse { val, target } => no_output!(gen_if_false(jit, asm, opnd!(val), target)), &Insn::Send { cd, blockiseq, state, .. } => gen_send(jit, asm, cd, blockiseq, &function.frame_state(state)), - Insn::SendWithoutBlock { cd, state, .. } => gen_send_without_block(jit, asm, *cd, &function.frame_state(*state)), + Insn::SendWithoutBlock { cd, state, def_type, .. } => gen_send_without_block(jit, asm, *cd, *def_type, &function.frame_state(*state)), // Give up SendWithoutBlockDirect for 6+ args since asm.ccall() doesn't support it. Insn::SendWithoutBlockDirect { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self - gen_send_without_block(jit, asm, *cd, &function.frame_state(*state)), + gen_send_without_block(jit, asm, *cd, None, &function.frame_state(*state)), Insn::SendWithoutBlockDirect { cme, iseq, recv, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(recv), opnds!(args), &function.frame_state(*state)), &Insn::InvokeSuper { cd, blockiseq, state, .. } => gen_invokesuper(jit, asm, cd, blockiseq, &function.frame_state(state)), Insn::InvokeBlock { cd, state, .. } => gen_invokeblock(jit, asm, *cd, &function.frame_state(*state)), @@ -388,11 +389,12 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::FixnumAnd { left, right } => gen_fixnum_and(asm, opnd!(left), opnd!(right)), Insn::FixnumOr { left, right } => gen_fixnum_or(asm, opnd!(left), opnd!(right)), Insn::IsNil { val } => gen_isnil(asm, opnd!(val)), - &Insn::IsMethodCfunc { val, cd, cfunc } => gen_is_method_cfunc(jit, asm, opnd!(val), cd, cfunc), + &Insn::IsMethodCfunc { val, cd, cfunc, state: _ } => gen_is_method_cfunc(jit, asm, opnd!(val), cd, cfunc), Insn::Test { val } => gen_test(asm, opnd!(val)), Insn::GuardType { val, guard_type, state } => gen_guard_type(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardTypeNot { val, guard_type, state } => gen_guard_type_not(jit, asm, opnd!(val), *guard_type, &function.frame_state(*state)), Insn::GuardBitEquals { val, expected, state } => gen_guard_bit_equals(jit, asm, opnd!(val), *expected, &function.frame_state(*state)), + &Insn::GuardBlockParamProxy { level, state } => no_output!(gen_guard_block_param_proxy(jit, asm, level, &function.frame_state(state))), Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))), Insn::CCall { cfun, args, name: _, return_type: _, elidable: _ } => gen_ccall(asm, *cfun, opnds!(args)), Insn::GetIvar { self_val, id, state: _ } => gen_getivar(asm, opnd!(self_val), *id), @@ -400,7 +402,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::GetGlobal { id, state } => gen_getglobal(jit, asm, *id, &function.frame_state(*state)), &Insn::GetLocal { ep_offset, level } => gen_getlocal_with_ep(asm, ep_offset, level), &Insn::SetLocal { val, ep_offset, level } => no_output!(gen_setlocal_with_ep(asm, opnd!(val), function.type_of(val), ep_offset, level)), - &Insn::GetBlockParamProxy { level, state } => gen_get_block_param_proxy(jit, asm, level, &function.frame_state(state)), Insn::GetConstantPath { ic, state } => gen_get_constant_path(jit, asm, *ic, &function.frame_state(*state)), Insn::SetIvar { self_val, id, val, state: _ } => no_output!(gen_setivar(asm, opnd!(self_val), *id, opnd!(val))), Insn::SideExit { state, reason } => no_output!(gen_side_exit(jit, asm, reason, &function.frame_state(*state))), @@ -551,7 +552,7 @@ fn gen_setlocal_with_ep(asm: &mut Assembler, val: Opnd, val_type: Type, local_ep } } -fn gen_get_block_param_proxy(jit: &JITState, asm: &mut Assembler, level: u32, state: &FrameState) -> lir::Opnd { +fn gen_guard_block_param_proxy(jit: &JITState, asm: &mut Assembler, level: u32, state: &FrameState) { // Bail out if the `&block` local variable has been modified let ep = gen_get_ep(asm, level); let flags = Opnd::mem(64, ep, SIZEOF_VALUE_I32 * (VM_ENV_DATA_INDEX_FLAGS as i32)); @@ -569,9 +570,6 @@ fn gen_get_block_param_proxy(jit: &JITState, asm: &mut Assembler, level: u32, st let block_handler = asm.load(Opnd::mem(64, ep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); asm.test(block_handler, 0x1.into()); asm.jz(side_exit(jit, state, SideExitReason::BlockParamProxyNotIseqOrIfunc)); - - // Return the rb_block_param_proxy instance (GC root, so put as a number to avoid unnecessary GC tracing) - unsafe { rb_block_param_proxy }.as_u64().into() } fn gen_get_constant_path(jit: &JITState, asm: &mut Assembler, ic: *const iseq_inline_constant_cache, state: &FrameState) -> Opnd { @@ -999,10 +997,16 @@ fn gen_send_without_block( jit: &mut JITState, asm: &mut Assembler, cd: *const rb_call_data, + def_type: Option, state: &FrameState, ) -> lir::Opnd { gen_incr_counter(asm, Counter::dynamic_send_count); gen_incr_counter(asm, Counter::dynamic_send_type_send_without_block); + + if let Some(def_type) = def_type { + gen_incr_counter(asm, send_fallback_counter(def_type)); + } + gen_prepare_non_leaf_call(jit, asm, state); asm_comment!(asm, "call #{} with dynamic dispatch", ruby_call_method_name(cd)); unsafe extern "C" { @@ -1231,12 +1235,20 @@ fn gen_new_range_fixnum( } fn gen_object_alloc(jit: &JITState, asm: &mut Assembler, val: lir::Opnd, state: &FrameState) -> lir::Opnd { - // TODO: this is leaf in the vast majority of cases, - // Should specialize to avoid `gen_prepare_non_leaf_call` (Shopify#747) + // Allocating an object from an unknown class is non-leaf; see doc for `ObjectAlloc`. gen_prepare_non_leaf_call(jit, asm, state); asm_ccall!(asm, rb_obj_alloc, val) } +fn gen_object_alloc_class(asm: &mut Assembler, class: VALUE, state: &FrameState) -> lir::Opnd { + // Allocating an object for a known class with default allocator is leaf; see doc for + // `ObjectAllocClass`. + gen_prepare_leaf_call_with_gc(asm, state); + assert!(unsafe { rb_zjit_class_has_default_allocator(class) }, "class must have default allocator"); + // TODO(max): inline code to allocate an instance + asm_ccall!(asm, rb_class_allocate_instance, class.into()) +} + /// Compile code that exits from JIT code with a return value fn gen_return(asm: &mut Assembler, val: lir::Opnd) { // Pop the current frame (ec->cfp++) diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 4bb1c3dffd4b06..4b83051a67e0e2 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -937,6 +937,17 @@ unsafe extern "C" { pub fn rb_zjit_defined_ivar(obj: VALUE, id: ID, pushval: VALUE) -> VALUE; pub fn rb_zjit_insn_leaf(insn: ::std::os::raw::c_int, opes: *const VALUE) -> bool; pub fn rb_zjit_local_id(iseq: *const rb_iseq_t, idx: ::std::os::raw::c_uint) -> ID; + pub fn rb_zjit_cme_is_cfunc( + me: *const rb_callable_method_entry_t, + func: *const ::std::os::raw::c_void, + ) -> bool; + pub fn rb_zjit_vm_search_method( + cd_owner: VALUE, + cd: *mut rb_call_data, + recv: VALUE, + ) -> *const rb_callable_method_entry_struct; + pub fn rb_zjit_class_initialized_p(klass: VALUE) -> bool; + pub fn rb_zjit_class_has_default_allocator(klass: VALUE) -> bool; pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE; pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 033531fd566a7d..6bdfbb9dadde64 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -467,6 +467,42 @@ pub enum SideExitReason { StackOverflow, } +#[derive(Debug, Clone, Copy)] +pub enum MethodType { + Iseq, + Cfunc, + Attrset, + Ivar, + Bmethod, + Zsuper, + Alias, + Undefined, + NotImplemented, + Optimized, + Missing, + Refined, +} + +impl From for MethodType { + fn from(value: u32) -> Self { + match value { + VM_METHOD_TYPE_ISEQ => MethodType::Iseq, + VM_METHOD_TYPE_CFUNC => MethodType::Cfunc, + VM_METHOD_TYPE_ATTRSET => MethodType::Attrset, + VM_METHOD_TYPE_IVAR => MethodType::Ivar, + VM_METHOD_TYPE_BMETHOD => MethodType::Bmethod, + VM_METHOD_TYPE_ZSUPER => MethodType::Zsuper, + VM_METHOD_TYPE_ALIAS => MethodType::Alias, + VM_METHOD_TYPE_UNDEF => MethodType::Undefined, + VM_METHOD_TYPE_NOTIMPLEMENTED => MethodType::NotImplemented, + VM_METHOD_TYPE_OPTIMIZED => MethodType::Optimized, + VM_METHOD_TYPE_MISSING => MethodType::Missing, + VM_METHOD_TYPE_REFINED => MethodType::Refined, + _ => unreachable!("unknown send_without_block def_type: {}", value), + } + } +} + impl std::fmt::Display for SideExitReason { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { @@ -524,8 +560,15 @@ pub enum Insn { HashDup { val: InsnId, state: InsnId }, - /// Allocate an instance of the `val` class without calling `#initialize` on it. + /// Allocate an instance of the `val` object without calling `#initialize` on it. + /// This can: + /// * raise an exception if `val` is not a class + /// * run arbitrary code if `val` is a class with a custom allocator ObjectAlloc { val: InsnId, state: InsnId }, + /// Allocate an instance of the `val` class without calling `#initialize` on it. + /// This requires that `class` has the default allocator (for example via `IsMethodCfunc`). + /// This won't raise or run arbitrary code because `class` has the default allocator. + ObjectAllocClass { class: VALUE, state: InsnId }, /// Check if the value is truthy and "return" a C boolean. In reality, we will likely fuse this /// with IfTrue/IfFalse in the backend to generate jcc. @@ -533,7 +576,7 @@ pub enum Insn { /// Return C `true` if `val` is `Qnil`, else `false`. IsNil { val: InsnId }, /// Return C `true` if `val`'s method on cd resolves to the cfunc. - IsMethodCfunc { val: InsnId, cd: *const rb_call_data, cfunc: *const u8 }, + IsMethodCfunc { val: InsnId, cd: *const rb_call_data, cfunc: *const u8, state: InsnId }, Defined { op_type: usize, obj: VALUE, pushval: VALUE, v: InsnId, state: InsnId }, GetConstantPath { ic: *const iseq_inline_constant_cache, state: InsnId }, @@ -559,9 +602,6 @@ pub enum Insn { GetLocal { level: u32, ep_offset: u32 }, /// Set a local variable in a higher scope or the heap SetLocal { level: u32, ep_offset: u32, val: InsnId }, - /// Get a special singleton instance `rb_block_param_proxy` if the block - /// handler for the EP specified by `level` is an ISEQ or an ifunc. - GetBlockParamProxy { level: u32, state: InsnId }, GetSpecialSymbol { symbol_type: SpecialBackrefSymbol, state: InsnId }, GetSpecialNumber { nth: u64, state: InsnId }, @@ -583,7 +623,13 @@ pub enum Insn { /// Un-optimized fallback implementation (dynamic dispatch) for send-ish instructions /// Ignoring keyword arguments etc for now - SendWithoutBlock { recv: InsnId, cd: *const rb_call_data, args: Vec, state: InsnId }, + SendWithoutBlock { + recv: InsnId, + cd: *const rb_call_data, + args: Vec, + def_type: Option, // Assigned in `optimize_direct_sends` if it's not optimized + state: InsnId, + }, Send { recv: InsnId, cd: *const rb_call_data, blockiseq: IseqPtr, args: Vec, state: InsnId }, InvokeSuper { recv: InsnId, cd: *const rb_call_data, blockiseq: IseqPtr, args: Vec, state: InsnId }, InvokeBlock { cd: *const rb_call_data, args: Vec, state: InsnId }, @@ -637,6 +683,9 @@ pub enum Insn { GuardBitEquals { val: InsnId, expected: VALUE, state: InsnId }, /// Side-exit if val doesn't have the expected shape. GuardShape { val: InsnId, shape: ShapeId, state: InsnId }, + /// Side-exit if the block param has been modified or the block handler for the frame + /// is neither ISEQ nor ifunc, which makes it incompatible with rb_block_param_proxy. + GuardBlockParamProxy { level: u32, state: InsnId }, /// Generate no code (or padding if necessary) and insert a patch point /// that can be rewritten to a side exit when the Invariant is broken. @@ -662,7 +711,7 @@ impl Insn { | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetGlobal { .. } | Insn::SetLocal { .. } | Insn::Throw { .. } | Insn::IncrCounter(_) - | Insn::CheckInterrupts { .. } => false, + | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } => false, _ => true, } } @@ -713,6 +762,7 @@ impl Insn { Insn::LoadIvarEmbedded { .. } => false, Insn::LoadIvarExtended { .. } => false, Insn::CCall { elidable, .. } => !elidable, + Insn::ObjectAllocClass { .. } => false, // TODO: NewRange is effects free if we can prove the two ends to be Fixnum, // but we don't have type information here in `impl Insn`. See rb_range_new(). Insn::NewRange { .. } => true, @@ -777,6 +827,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::ArrayDup { val, .. } => { write!(f, "ArrayDup {val}") } Insn::HashDup { val, .. } => { write!(f, "HashDup {val}") } Insn::ObjectAlloc { val, .. } => { write!(f, "ObjectAlloc {val}") } + Insn::ObjectAllocClass { class, .. } => { write!(f, "ObjectAllocClass {}", class.print(self.ptr_map)) } Insn::StringCopy { val, .. } => { write!(f, "StringCopy {val}") } Insn::StringConcat { strings, .. } => { write!(f, "StringConcat")?; @@ -879,6 +930,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::GuardTypeNot { val, guard_type, .. } => { write!(f, "GuardTypeNot {val}, {}", guard_type.print(self.ptr_map)) }, Insn::GuardBitEquals { val, expected, .. } => { write!(f, "GuardBitEquals {val}, {}", expected.print(self.ptr_map)) }, &Insn::GuardShape { val, shape, .. } => { write!(f, "GuardShape {val}, {:p}", self.ptr_map.map_shape(shape)) }, + Insn::GuardBlockParamProxy { level, .. } => write!(f, "GuardBlockParamProxy l{level}"), Insn::PatchPoint { invariant, .. } => { write!(f, "PatchPoint {}", invariant.print(self.ptr_map)) }, Insn::GetConstantPath { ic, .. } => { write!(f, "GetConstantPath {:p}", self.ptr_map.map_ptr(ic)) }, Insn::CCall { cfun, args, name, return_type: _, elidable: _ } => { @@ -914,7 +966,6 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::SetGlobal { id, val, .. } => write!(f, "SetGlobal :{}, {val}", id.contents_lossy()), Insn::GetLocal { level, ep_offset } => write!(f, "GetLocal l{level}, EP@{ep_offset}"), Insn::SetLocal { val, level, ep_offset } => write!(f, "SetLocal l{level}, EP@{ep_offset}, {val}"), - Insn::GetBlockParamProxy { level, .. } => write!(f, "GetBlockParamProxy l{level}"), Insn::GetSpecialSymbol { symbol_type, .. } => write!(f, "GetSpecialSymbol {symbol_type:?}"), Insn::GetSpecialNumber { nth, .. } => write!(f, "GetSpecialNumber {nth}"), Insn::ToArray { val, .. } => write!(f, "ToArray {val}"), @@ -1299,7 +1350,7 @@ impl Function { &ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state }, &Test { val } => Test { val: find!(val) }, &IsNil { val } => IsNil { val: find!(val) }, - &IsMethodCfunc { val, cd, cfunc } => IsMethodCfunc { val: find!(val), cd, cfunc }, + &IsMethodCfunc { val, cd, cfunc, state } => IsMethodCfunc { val: find!(val), cd, cfunc, state }, Jump(target) => Jump(find_branch_edge!(target)), &IfTrue { val, ref target } => IfTrue { val: find!(val), target: find_branch_edge!(target) }, &IfFalse { val, ref target } => IfFalse { val: find!(val), target: find_branch_edge!(target) }, @@ -1307,6 +1358,7 @@ impl Function { &GuardTypeNot { val, guard_type, state } => GuardTypeNot { val: find!(val), guard_type, state }, &GuardBitEquals { val, expected, state } => GuardBitEquals { val: find!(val), expected, state }, &GuardShape { val, shape, state } => GuardShape { val: find!(val), shape, state }, + &GuardBlockParamProxy { level, state } => GuardBlockParamProxy { level, state: find!(state) }, &FixnumAdd { left, right, state } => FixnumAdd { left: find!(left), right: find!(right), state }, &FixnumSub { left, right, state } => FixnumSub { left: find!(left), right: find!(right), state }, &FixnumMult { left, right, state } => FixnumMult { left: find!(left), right: find!(right), state }, @@ -1330,10 +1382,11 @@ impl Function { str: find!(str), state, }, - &SendWithoutBlock { recv, cd, ref args, state } => SendWithoutBlock { + &SendWithoutBlock { recv, cd, ref args, def_type, state } => SendWithoutBlock { recv: find!(recv), cd, args: find_vec!(args), + def_type, state, }, &SendWithoutBlockDirect { recv, cd, cme, iseq, ref args, state } => SendWithoutBlockDirect { @@ -1367,6 +1420,7 @@ impl Function { &ArrayDup { val, state } => ArrayDup { val: find!(val), state }, &HashDup { val, state } => HashDup { val: find!(val), state }, &ObjectAlloc { val, state } => ObjectAlloc { val: find!(val), state }, + &ObjectAllocClass { class, state } => ObjectAllocClass { class, state: find!(state) }, &CCall { cfun, ref args, name, return_type, elidable } => CCall { cfun, args: find_vec!(args), name, return_type, elidable }, &Defined { op_type, obj, pushval, v, state } => Defined { op_type, obj, pushval, v: find!(v), state: find!(state) }, &DefinedIvar { self_val, pushval, id, state } => DefinedIvar { self_val: find!(self_val), pushval, id, state }, @@ -1381,7 +1435,6 @@ impl Function { &NewRange { low, high, flag, state } => NewRange { low: find!(low), high: find!(high), flag, state: find!(state) }, &NewRangeFixnum { low, high, flag, state } => NewRangeFixnum { low: find!(low), high: find!(high), flag, state: find!(state) }, &ArrayMax { ref elements, state } => ArrayMax { elements: find_vec!(elements), state: find!(state) }, - &GetBlockParamProxy { level, state } => GetBlockParamProxy { level, state: find!(state) }, &SetGlobal { id, val, state } => SetGlobal { id, val: find!(val), state }, &GetIvar { self_val, id, state } => GetIvar { self_val: find!(self_val), id, state }, &LoadIvarEmbedded { self_val, id, index } => LoadIvarEmbedded { self_val: find!(self_val), id, index }, @@ -1422,7 +1475,7 @@ impl Function { | Insn::IfTrue { .. } | Insn::IfFalse { .. } | Insn::Return { .. } | Insn::Throw { .. } | Insn::PatchPoint { .. } | Insn::SetIvar { .. } | Insn::ArrayExtend { .. } | Insn::ArrayPush { .. } | Insn::SideExit { .. } | Insn::SetLocal { .. } | Insn::IncrCounter(_) - | Insn::CheckInterrupts { .. } => + | Insn::CheckInterrupts { .. } | Insn::GuardBlockParamProxy { .. } => panic!("Cannot infer type of instruction with no output: {}", self.insns[insn.0]), Insn::Const { val: Const::Value(val) } => Type::from_value(*val), Insn::Const { val: Const::CBool(val) } => Type::from_cbool(*val), @@ -1454,6 +1507,7 @@ impl Function { Insn::NewRange { .. } => types::RangeExact, Insn::NewRangeFixnum { .. } => types::RangeExact, Insn::ObjectAlloc { .. } => types::HeapObject, + Insn::ObjectAllocClass { class, .. } => Type::from_class(*class), Insn::CCall { return_type, .. } => *return_type, Insn::GuardType { val, guard_type, .. } => self.type_of(*val).intersection(*guard_type), Insn::GuardTypeNot { .. } => types::BasicObject, @@ -1494,7 +1548,6 @@ impl Function { Insn::ObjToString { .. } => types::BasicObject, Insn::AnyToString { .. } => types::String, Insn::GetLocal { .. } => types::BasicObject, - Insn::GetBlockParamProxy { .. } => types::BasicObject, // The type of Snapshot doesn't really matter; it's never materialized. It's used only // as a reference for FrameState, which we use to generate side-exit code. Insn::Snapshot { .. } => types::Any, @@ -1704,7 +1757,8 @@ impl Function { /// Rewrite SendWithoutBlock opcodes into SendWithoutBlockDirect opcodes if we know the target /// ISEQ statically. This removes run-time method lookups and opens the door for inlining. - fn optimize_direct_sends(&mut self) { + /// Also try and inline constant caches, specialize object allocations, and more. + fn type_specialize(&mut self) { for block in self.rpo() { let old_insns = std::mem::take(&mut self.blocks[block.0].insns); assert!(self.blocks[block.0].insns.is_empty()); @@ -1742,7 +1796,7 @@ impl Function { self.try_rewrite_uminus(block, insn_id, recv, state), Insn::SendWithoutBlock { recv, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(aref) && args.len() == 1 => self.try_rewrite_aref(block, insn_id, recv, args[0], state), - Insn::SendWithoutBlock { mut recv, cd, args, state } => { + Insn::SendWithoutBlock { mut recv, cd, args, state, .. } => { let frame_state = self.frame_state(state); let (klass, profiled_type) = if let Some(klass) = self.type_of(recv).runtime_exact_ruby_class() { // If we know the class statically, use it to fold the lookup at compile-time. @@ -1798,6 +1852,9 @@ impl Function { let getivar = self.push_insn(block, Insn::GetIvar { self_val: recv, id, state }); self.make_equal_to(insn_id, getivar); } else { + if let Insn::SendWithoutBlock { def_type: insn_def_type, .. } = &mut self.insns[insn_id.0] { + *insn_def_type = Some(MethodType::from(def_type)); + } self.push_insn_id(block, insn_id); continue; } } @@ -1839,7 +1896,7 @@ impl Function { self.make_equal_to(insn_id, guard); } else { self.push_insn(block, Insn::GuardTypeNot { val, guard_type: types::String, state}); - let send_to_s = self.push_insn(block, Insn::SendWithoutBlock { recv: val, cd, args: vec![], state}); + let send_to_s = self.push_insn(block, Insn::SendWithoutBlock { recv: val, cd, args: vec![], def_type: None, state}); self.make_equal_to(insn_id, send_to_s); } } @@ -1850,6 +1907,43 @@ impl Function { self.push_insn_id(block, insn_id); } } + Insn::IsMethodCfunc { val, cd, cfunc, state } if self.type_of(val).ruby_object_known() => { + let class = self.type_of(val).ruby_object().unwrap(); + let cme = unsafe { rb_zjit_vm_search_method(self.iseq.into(), cd as *mut rb_call_data, class) }; + let is_expected_cfunc = unsafe { rb_zjit_cme_is_cfunc(cme, cfunc as *const c_void) }; + let method = unsafe { rb_vm_ci_mid((*cd).ci) }; + self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass: class, method, cme }, state }); + let replacement = self.push_insn(block, Insn::Const { val: Const::CBool(is_expected_cfunc) }); + self.insn_types[replacement.0] = self.infer_type(replacement); + self.make_equal_to(insn_id, replacement); + } + Insn::ObjectAlloc { val, state } => { + let val_type = self.type_of(val); + if !val_type.is_subtype(types::Class) { + self.push_insn_id(block, insn_id); continue; + } + let Some(class) = val_type.ruby_object() else { + self.push_insn_id(block, insn_id); continue; + }; + // See class_get_alloc_func in object.c; if the class isn't initialized, is + // a singleton class, or has a custom allocator, ObjectAlloc might raise an + // exception or run arbitrary code. + // + // We also need to check if the class is initialized or a singleton before trying to read the allocator, otherwise it might raise. + if !unsafe { rb_zjit_class_initialized_p(class) } { + self.push_insn_id(block, insn_id); continue; + } + if unsafe { rb_zjit_singleton_class_p(class) } { + self.push_insn_id(block, insn_id); continue; + } + if !unsafe { rb_zjit_class_has_default_allocator(class) } { + // Custom or NULL allocator; could run arbitrary code. + self.push_insn_id(block, insn_id); continue; + } + let replacement = self.push_insn(block, Insn::ObjectAllocClass { class, state }); + self.insn_types[replacement.0] = self.infer_type(replacement); + self.make_equal_to(insn_id, replacement); + } _ => { self.push_insn_id(block, insn_id); } } } @@ -2228,8 +2322,7 @@ impl Function { | &Insn::Return { val } | &Insn::Test { val } | &Insn::SetLocal { val, .. } - | &Insn::IsNil { val } - | &Insn::IsMethodCfunc { val, .. } => + | &Insn::IsNil { val } => worklist.push_back(val), &Insn::SetGlobal { val, state, .. } | &Insn::Defined { v: val, state, .. } @@ -2241,6 +2334,7 @@ impl Function { | &Insn::GuardBitEquals { val, state, .. } | &Insn::GuardShape { val, state, .. } | &Insn::ToArray { val, state } + | &Insn::IsMethodCfunc { val, state, .. } | &Insn::ToNewArray { val, state } => { worklist.push_back(val); worklist.push_back(state); @@ -2324,10 +2418,11 @@ impl Function { | &Insn::LoadIvarExtended { self_val, .. } => { worklist.push_back(self_val); } - &Insn::GetBlockParamProxy { state, .. } | + &Insn::GuardBlockParamProxy { state, .. } | &Insn::GetGlobal { state, .. } | &Insn::GetSpecialSymbol { state, .. } | &Insn::GetSpecialNumber { state, .. } | + &Insn::ObjectAllocClass { state, .. } | &Insn::SideExit { state, .. } => worklist.push_back(state), } } @@ -2466,7 +2561,7 @@ impl Function { /// Run all the optimization passes we have. pub fn optimize(&mut self) { // Function is assumed to have types inferred already - self.optimize_direct_sends(); + self.type_specialize(); #[cfg(debug_assertions)] self.assert_validates(); self.optimize_getivar(); #[cfg(debug_assertions)] self.assert_validates(); @@ -3382,7 +3477,8 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { // TODO: Guard on a profiled class and add a patch point for #new redefinition let argc = unsafe { vm_ci_argc((*cd).ci) } as usize; let val = state.stack_topn(argc)?; - let test_id = fun.push_insn(block, Insn::IsMethodCfunc { val, cd, cfunc: rb_class_new_instance_pass_kw as *const u8 }); + let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); + let test_id = fun.push_insn(block, Insn::IsMethodCfunc { val, cd, cfunc: rb_class_new_instance_pass_kw as *const u8, state: exit_id }); // Jump to the fallback block if it's not the expected function. // Skip CheckInterrupts since the #new call will do it very soon anyway. @@ -3395,7 +3491,6 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { queue.push_back((state.clone(), target, target_idx, local_inval)); // Move on to the fast path - let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); let insn_id = fun.push_insn(block, Insn::ObjectAlloc { val, state: exit_id }); state.stack_setn(argc, insn_id); state.stack_setn(argc + 1, insn_id); @@ -3469,7 +3564,9 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { YARVINSN_getblockparamproxy => { let level = get_arg(pc, 1).as_u32(); let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - state.stack_push(fun.push_insn(block, Insn::GetBlockParamProxy { level, state: exit_id })); + fun.push_insn(block, Insn::GuardBlockParamProxy { level, state: exit_id }); + // TODO(Shopify/ruby#753): GC root, so we should be able to avoid unnecessary GC tracing + state.stack_push(fun.push_insn(block, Insn::Const { val: Const::Value(unsafe { rb_block_param_proxy }) })); } YARVINSN_pop => { state.stack_pop()?; } YARVINSN_dup => { state.stack_push(state.stack_top()?); } @@ -3520,7 +3617,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let args = state.stack_pop_n(argc as usize)?; let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, def_type: None, state: exit_id }); state.stack_push(send); } YARVINSN_opt_hash_freeze | @@ -3544,7 +3641,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); let recv = fun.push_insn(block, Insn::Const { val: Const::Value(get_arg(pc, 0)) }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, def_type: None, state: exit_id }); state.stack_push(send); } @@ -3601,7 +3698,7 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { let args = state.stack_pop_n(argc as usize)?; let recv = state.stack_pop()?; let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state }); - let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, state: exit_id }); + let send = fun.push_insn(block, Insn::SendWithoutBlock { recv, cd, args, def_type: None, state: exit_id }); state.stack_push(send); } YARVINSN_send => { @@ -5330,8 +5427,8 @@ mod tests { bb0(v0:BasicObject): v5:BasicObject = GetConstantPath 0x1000 v6:NilClass = Const Value(nil) - v7:CBool = IsMethodCFunc v5, :new - IfFalse v7, bb1(v0, v6, v5) + v8:CBool = IsMethodCFunc v5, :new + IfFalse v8, bb1(v0, v6, v5) v10:HeapObject = ObjectAlloc v5 v12:BasicObject = SendWithoutBlock v10, :initialize CheckInterrupts @@ -5896,19 +5993,20 @@ mod tests { v5:NilClass = Const Value(nil) v10:BasicObject = InvokeBuiltin dir_s_open, v0, v1, v2 PatchPoint NoEPEscape(open) - v16:BasicObject = GetBlockParamProxy l0 + GuardBlockParamProxy l0 + v17:BasicObject[BlockParamProxy] = Const Value(VALUE(0x1000)) CheckInterrupts - v19:CBool = Test v16 - IfFalse v19, bb1(v0, v1, v2, v3, v4, v10) + v20:CBool = Test v17 + IfFalse v20, bb1(v0, v1, v2, v3, v4, v10) PatchPoint NoEPEscape(open) - v26:BasicObject = InvokeBlock, v10 - v30:BasicObject = InvokeBuiltin dir_s_close, v0, v10 + v27:BasicObject = InvokeBlock, v10 + v31:BasicObject = InvokeBuiltin dir_s_close, v0, v10 CheckInterrupts - Return v26 - bb1(v36:BasicObject, v37:BasicObject, v38:BasicObject, v39:BasicObject, v40:BasicObject, v41:BasicObject): + Return v27 + bb1(v37:BasicObject, v38:BasicObject, v39:BasicObject, v40:BasicObject, v41:BasicObject, v42:BasicObject): PatchPoint NoEPEscape(open) CheckInterrupts - Return v41 + Return v42 "); } @@ -8021,18 +8119,12 @@ mod opt_tests { PatchPoint StableConstantNames(0x1000, C) v34:Class[VALUE(0x1008)] = Const Value(VALUE(0x1008)) v6:NilClass = Const Value(nil) - v7:CBool = IsMethodCFunc v34, :new - IfFalse v7, bb1(v0, v6, v34) - v10:HeapObject = ObjectAlloc v34 - v12:BasicObject = SendWithoutBlock v10, :initialize + PatchPoint MethodRedefined(C@0x1008, new@0x1010, cme:0x1018) + v37:HeapObject[class_exact:C] = ObjectAllocClass VALUE(0x1008) + v12:BasicObject = SendWithoutBlock v37, :initialize CheckInterrupts - Jump bb2(v0, v10, v12) - bb1(v16:BasicObject, v17:NilClass, v18:Class[VALUE(0x1008)]): - v21:BasicObject = SendWithoutBlock v18, :new - Jump bb2(v16, v21, v17) - bb2(v23:BasicObject, v24:BasicObject, v25:BasicObject): CheckInterrupts - Return v24 + Return v37 "); } @@ -8055,20 +8147,13 @@ mod opt_tests { v36:Class[VALUE(0x1008)] = Const Value(VALUE(0x1008)) v6:NilClass = Const Value(nil) v7:Fixnum[1] = Const Value(1) - v8:CBool = IsMethodCFunc v36, :new - IfFalse v8, bb1(v0, v6, v36, v7) - v11:HeapObject = ObjectAlloc v36 - PatchPoint MethodRedefined(C@0x1008, initialize@0x1010, cme:0x1018) - v38:HeapObject[class_exact:C] = GuardType v11, HeapObject[class_exact:C] - v39:BasicObject = SendWithoutBlockDirect v38, :initialize (0x1040), v7 - CheckInterrupts - Jump bb2(v0, v11, v39) - bb1(v17:BasicObject, v18:NilClass, v19:Class[VALUE(0x1008)], v20:Fixnum[1]): - v23:BasicObject = SendWithoutBlock v19, :new, v20 - Jump bb2(v17, v23, v18) - bb2(v25:BasicObject, v26:BasicObject, v27:BasicObject): + PatchPoint MethodRedefined(C@0x1008, new@0x1010, cme:0x1018) + v39:HeapObject[class_exact:C] = ObjectAllocClass VALUE(0x1008) + PatchPoint MethodRedefined(C@0x1008, initialize@0x1040, cme:0x1048) + v41:BasicObject = SendWithoutBlockDirect v39, :initialize (0x1070), v7 CheckInterrupts - Return v26 + CheckInterrupts + Return v39 "); } @@ -8112,11 +8197,12 @@ mod opt_tests { assert_snapshot!(hir_string("test"), @r" fn test@:2: bb0(v0:BasicObject, v1:BasicObject): - v6:BasicObject = GetBlockParamProxy l0 - v8:BasicObject = Send v0, 0x1000, :tap, v6 - v9:BasicObject = GetLocal l0, EP@3 + GuardBlockParamProxy l0 + v7:BasicObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v9:BasicObject = Send v0, 0x1008, :tap, v7 + v10:BasicObject = GetLocal l0, EP@3 CheckInterrupts - Return v8 + Return v9 "); } diff --git a/zjit/src/hir_type/mod.rs b/zjit/src/hir_type/mod.rs index 926d6d306fff7b..f2fb8702570cf7 100644 --- a/zjit/src/hir_type/mod.rs +++ b/zjit/src/hir_type/mod.rs @@ -1,7 +1,7 @@ //! High-level intermediate representation types. #![allow(non_upper_case_globals)] -use crate::cruby::{Qfalse, Qnil, Qtrue, VALUE, RUBY_T_ARRAY, RUBY_T_STRING, RUBY_T_HASH, RUBY_T_CLASS, RUBY_T_MODULE}; +use crate::cruby::{rb_block_param_proxy, Qfalse, Qnil, Qtrue, RUBY_T_ARRAY, RUBY_T_CLASS, RUBY_T_HASH, RUBY_T_MODULE, RUBY_T_STRING, VALUE}; use crate::cruby::{rb_cInteger, rb_cFloat, rb_cArray, rb_cHash, rb_cString, rb_cSymbol, rb_cObject, rb_cTrueClass, rb_cFalseClass, rb_cNilClass, rb_cRange, rb_cSet, rb_cRegexp, rb_cClass, rb_cModule, rb_zjit_singleton_class_p}; use crate::cruby::ClassRelationship; use crate::cruby::get_class_name; @@ -75,6 +75,7 @@ fn write_spec(f: &mut std::fmt::Formatter, printer: &TypePrinter) -> std::fmt::R match ty.spec { Specialization::Any | Specialization::Empty => { Ok(()) }, Specialization::Object(val) if val == unsafe { rb_mRubyVMFrozenCore } => write!(f, "[VMFrozenCore]"), + Specialization::Object(val) if val == unsafe { rb_block_param_proxy } => write!(f, "[BlockParamProxy]"), Specialization::Object(val) if ty.is_subtype(types::Symbol) => write!(f, "[:{}]", ruby_sym_to_rust_string(val)), Specialization::Object(val) => write!(f, "[{}]", val.print(printer.ptr_map)), // TODO(max): Ensure singleton classes never have Type specialization @@ -256,6 +257,20 @@ impl Type { } } + pub fn from_class(class: VALUE) -> Type { + if class == unsafe { rb_cArray } { types::ArrayExact } + else if class == unsafe { rb_cFalseClass } { types::FalseClass } + else if class == unsafe { rb_cHash } { types::HashExact } + else if class == unsafe { rb_cInteger } { types::Integer} + else if class == unsafe { rb_cNilClass } { types::NilClass } + else if class == unsafe { rb_cString } { types::StringExact } + else if class == unsafe { rb_cTrueClass } { types::TrueClass } + else { + // TODO(max): Add more cases for inferring type bits from built-in types + Type { bits: bits::HeapObject, spec: Specialization::TypeExact(class) } + } + } + /// Private. Only for creating type globals. const fn from_bits(bits: u64) -> Type { Type { @@ -667,11 +682,28 @@ mod tests { assert_eq!(types::FalseClass.inexact_ruby_class(), None); } + #[test] + fn from_class() { + crate::cruby::with_rubyvm(|| { + assert_bit_equal(Type::from_class(unsafe { rb_cInteger }), types::Integer); + assert_bit_equal(Type::from_class(unsafe { rb_cString }), types::StringExact); + assert_bit_equal(Type::from_class(unsafe { rb_cArray }), types::ArrayExact); + assert_bit_equal(Type::from_class(unsafe { rb_cHash }), types::HashExact); + assert_bit_equal(Type::from_class(unsafe { rb_cNilClass }), types::NilClass); + assert_bit_equal(Type::from_class(unsafe { rb_cTrueClass }), types::TrueClass); + assert_bit_equal(Type::from_class(unsafe { rb_cFalseClass }), types::FalseClass); + let c_class = define_class("C", unsafe { rb_cObject }); + assert_bit_equal(Type::from_class(c_class), Type { bits: bits::HeapObject, spec: Specialization::TypeExact(c_class) }); + }); + } + #[test] fn integer_has_ruby_class() { - assert_eq!(Type::fixnum(3).inexact_ruby_class(), Some(unsafe { rb_cInteger })); - assert_eq!(types::Fixnum.inexact_ruby_class(), None); - assert_eq!(types::Integer.inexact_ruby_class(), None); + crate::cruby::with_rubyvm(|| { + assert_eq!(Type::fixnum(3).inexact_ruby_class(), Some(unsafe { rb_cInteger })); + assert_eq!(types::Fixnum.inexact_ruby_class(), None); + assert_eq!(types::Integer.inexact_ruby_class(), None); + }); } #[test] diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 18e12d3af556be..1daf1fda4a9171 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -135,6 +135,20 @@ make_counters! { dynamic_send_type_send, dynamic_send_type_invokeblock, dynamic_send_type_invokesuper, + + // Method call def_type related to fallback to dynamic dispatch + send_fallback_iseq, + send_fallback_cfunc, + send_fallback_attrset, + send_fallback_ivar, + send_fallback_bmethod, + send_fallback_zsuper, + send_fallback_alias, + send_fallback_undef, + send_fallback_not_implemented, + send_fallback_optimized, + send_fallback_missing, + send_fallback_refined, } /// Increase a counter by a specified amount @@ -221,6 +235,26 @@ pub fn exit_counter_ptr(reason: crate::hir::SideExitReason) -> *mut u64 { counter_ptr(counter) } +pub fn send_fallback_counter(def_type: crate::hir::MethodType) -> Counter { + use crate::hir::MethodType::*; + use crate::stats::Counter::*; + + match def_type { + Iseq => send_fallback_iseq, + Cfunc => send_fallback_cfunc, + Attrset => send_fallback_attrset, + Ivar => send_fallback_ivar, + Bmethod => send_fallback_bmethod, + Zsuper => send_fallback_zsuper, + Alias => send_fallback_alias, + Undefined => send_fallback_undef, + NotImplemented => send_fallback_not_implemented, + Optimized => send_fallback_optimized, + Missing => send_fallback_missing, + Refined => send_fallback_refined, + } +} + /// Primitive called in zjit.rb. Zero out all the counters. #[unsafe(no_mangle)] pub extern "C" fn rb_zjit_reset_stats_bang(_ec: EcPtr, _self: VALUE) -> VALUE {