From 8337de95bc6f89c20cf16ac98aaef8e86c9a4992 Mon Sep 17 00:00:00 2001 From: Stan Lo Date: Fri, 3 Oct 2025 23:12:19 +0100 Subject: [PATCH 1/3] ZJIT: Add HIR for calling Cfunc with frame (#14661) * ZJIT: Add HIR for CCallWithFrame * ZJIT: Update stats to count not inlined cfunc calls * ZJIT: Stops optimizing SendWithoutBlock when TracePoint is activated * ZJIT: Fallback to SendWithoutBlock when CCallWithFrame has too many args * ZJIT: Rename cfun -> cfunc --- zjit.rb | 2 +- zjit/src/codegen.rs | 52 +++++++++++-- zjit/src/hir.rs | 185 +++++++++++++++++++++++++++----------------- zjit/src/state.rs | 12 +-- zjit/src/stats.rs | 10 ++- 5 files changed, 170 insertions(+), 91 deletions(-) diff --git a/zjit.rb b/zjit.rb index 8a037e35a0a007..1dccdefca273b1 100644 --- a/zjit.rb +++ b/zjit.rb @@ -153,7 +153,7 @@ def stats_string stats = self.stats # Show counters independent from exit_* or dynamic_send_* - print_counters_with_prefix(prefix: 'not_optimized_cfuncs_', prompt: 'unoptimized sends to C functions', buf:, stats:, limit: 20) + print_counters_with_prefix(prefix: 'not_inlined_cfuncs_', prompt: 'not inlined C methods', buf:, stats:, limit: 20) # Show fallback counters, ordered by the typical amount of fallbacks for the prefix at the time print_counters_with_prefix(prefix: 'unspecialized_def_type_', prompt: 'not optimized method types', buf:, stats:, limit: 20) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index b7c3dc3532e903..1c1bd2e07f3ebb 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -404,9 +404,12 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::GuardBitEquals { val, expected, state } => gen_guard_bit_equals(jit, asm, opnd!(val), *expected, &function.frame_state(*state)), &Insn::GuardBlockParamProxy { level, state } => no_output!(gen_guard_block_param_proxy(jit, asm, level, &function.frame_state(state))), Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))), - Insn::CCall { cfun, args, name: _, return_type: _, elidable: _ } => gen_ccall(asm, *cfun, opnds!(args)), - Insn::CCallVariadic { cfun, recv, args, name: _, cme, state } => { - gen_ccall_variadic(jit, asm, *cfun, opnd!(recv), opnds!(args), *cme, &function.frame_state(*state)) + Insn::CCall { cfunc, args, name: _, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, opnds!(args)), + Insn::CCallWithFrame { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self + gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs), + Insn::CCallWithFrame { cfunc, args, cme, state, .. } => gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, &function.frame_state(*state)), + Insn::CCallVariadic { cfunc, recv, args, name: _, cme, state } => { + gen_ccall_variadic(jit, asm, *cfunc, opnd!(recv), opnds!(args), *cme, &function.frame_state(*state)) } Insn::GetIvar { self_val, id, state: _ } => gen_getivar(asm, opnd!(self_val), *id), Insn::SetGlobal { id, val, state } => no_output!(gen_setglobal(jit, asm, *id, opnd!(val), &function.frame_state(*state))), @@ -664,11 +667,46 @@ fn gen_patch_point(jit: &mut JITState, asm: &mut Assembler, invariant: &Invarian }); } +/// Generate code for a C function call that pushes a frame +fn gen_ccall_with_frame(jit: &mut JITState, asm: &mut Assembler, cfunc: *const u8, args: Vec, cme: *const rb_callable_method_entry_t, state: &FrameState) -> lir::Opnd { + gen_prepare_non_leaf_call(jit, asm, state); + + gen_push_frame(asm, args.len(), state, ControlFrame { + recv: args[0], + iseq: None, + cme, + frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL, + }); + + asm_comment!(asm, "switch to new SP register"); + let sp_offset = (state.stack().len() - args.len() + VM_ENV_DATA_SIZE.as_usize()) * SIZEOF_VALUE; + let new_sp = asm.add(SP, sp_offset.into()); + asm.mov(SP, new_sp); + + asm_comment!(asm, "switch to new CFP"); + let new_cfp = asm.sub(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); + asm.mov(CFP, new_cfp); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); + + let result = asm.ccall(cfunc, args); + + asm_comment!(asm, "pop C frame"); + let new_cfp = asm.add(CFP, RUBY_SIZEOF_CONTROL_FRAME.into()); + asm.mov(CFP, new_cfp); + asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); + + asm_comment!(asm, "restore SP register for the caller"); + let new_sp = asm.sub(SP, sp_offset.into()); + asm.mov(SP, new_sp); + + result +} + /// Lowering for [`Insn::CCall`]. This is a low-level raw call that doesn't know /// anything about the callee, so handling for e.g. GC safety is dealt with elsewhere. -fn gen_ccall(asm: &mut Assembler, cfun: *const u8, args: Vec) -> lir::Opnd { +fn gen_ccall(asm: &mut Assembler, cfunc: *const u8, args: Vec) -> lir::Opnd { gen_incr_counter(asm, Counter::inline_cfunc_optimized_send_count); - asm.ccall(cfun, args) + asm.ccall(cfunc, args) } /// Generate code for a variadic C function call @@ -676,7 +714,7 @@ fn gen_ccall(asm: &mut Assembler, cfun: *const u8, args: Vec) -> lir::Opnd fn gen_ccall_variadic( jit: &mut JITState, asm: &mut Assembler, - cfun: *const u8, + cfunc: *const u8, recv: Opnd, args: Vec, cme: *const rb_callable_method_entry_t, @@ -707,7 +745,7 @@ fn gen_ccall_variadic( asm.store(Opnd::mem(64, EC, RUBY_OFFSET_EC_CFP), CFP); let argv_ptr = gen_push_opnds(jit, asm, &args); - let result = asm.ccall(cfun, vec![args.len().into(), argv_ptr, recv]); + let result = asm.ccall(cfunc, vec![args.len().into(), argv_ptr, recv]); gen_pop_opnds(asm, &args); asm_comment!(asm, "pop C frame"); diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 4c123f5621fa00..26145f46226fbb 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -535,6 +535,7 @@ pub enum SendFallbackReason { SendWithoutBlockCfuncArrayVariadic, SendWithoutBlockNotOptimizedMethodType(MethodType), SendWithoutBlockDirectTooManyArgs, + CCallWithFrameTooManyArgs, ObjToStringNotString, /// Initial fallback reason for every instruction, which should be mutated to /// a more actionable reason when an attempt to specialize the instruction fails. @@ -644,14 +645,24 @@ pub enum Insn { IfTrue { val: InsnId, target: BranchEdge }, IfFalse { val: InsnId, target: BranchEdge }, - /// Call a C function + /// Call a C function without pushing a frame /// `name` is for printing purposes only - CCall { cfun: *const u8, args: Vec, name: ID, return_type: Type, elidable: bool }, + CCall { cfunc: *const u8, args: Vec, name: ID, return_type: Type, elidable: bool }, + + /// Call a C function that pushes a frame + CCallWithFrame { + cd: *const rb_call_data, // cd for falling back to SendWithoutBlock + cfunc: *const u8, + args: Vec, + cme: *const rb_callable_method_entry_t, + name: ID, + state: InsnId + }, /// Call a variadic C function with signature: func(int argc, VALUE *argv, VALUE recv) /// This handles frame setup, argv creation, and frame teardown all in one CCallVariadic { - cfun: *const u8, + cfunc: *const u8, recv: InsnId, args: Vec, cme: *const rb_callable_method_entry_t, @@ -1020,15 +1031,22 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::GuardBlockParamProxy { level, .. } => write!(f, "GuardBlockParamProxy l{level}"), Insn::PatchPoint { invariant, .. } => { write!(f, "PatchPoint {}", invariant.print(self.ptr_map)) }, Insn::GetConstantPath { ic, .. } => { write!(f, "GetConstantPath {:p}", self.ptr_map.map_ptr(ic)) }, - Insn::CCall { cfun, args, name, return_type: _, elidable: _ } => { - write!(f, "CCall {}@{:p}", name.contents_lossy(), self.ptr_map.map_ptr(cfun))?; + Insn::CCall { cfunc, args, name, return_type: _, elidable: _ } => { + write!(f, "CCall {}@{:p}", name.contents_lossy(), self.ptr_map.map_ptr(cfunc))?; for arg in args { write!(f, ", {arg}")?; } Ok(()) }, - Insn::CCallVariadic { cfun, recv, args, name, .. } => { - write!(f, "CCallVariadic {}@{:p}, {recv}", name.contents_lossy(), self.ptr_map.map_ptr(cfun))?; + Insn::CCallWithFrame { cfunc, args, name, .. } => { + write!(f, "CallCFunc {}@{:p}", name.contents_lossy(), self.ptr_map.map_ptr(cfunc))?; + for arg in args { + write!(f, ", {arg}")?; + } + Ok(()) + }, + Insn::CCallVariadic { cfunc, recv, args, name, .. } => { + write!(f, "CCallVariadic {}@{:p}, {recv}", name.contents_lossy(), self.ptr_map.map_ptr(cfunc))?; for arg in args { write!(f, ", {arg}")?; } @@ -1545,9 +1563,10 @@ impl Function { &HashDup { val, state } => HashDup { val: find!(val), state }, &ObjectAlloc { val, state } => ObjectAlloc { val: find!(val), state }, &ObjectAllocClass { class, state } => ObjectAllocClass { class, state: find!(state) }, - &CCall { cfun, ref args, name, return_type, elidable } => CCall { cfun, args: find_vec!(args), name, return_type, elidable }, - &CCallVariadic { cfun, recv, ref args, cme, name, state } => CCallVariadic { - cfun, recv: find!(recv), args: find_vec!(args), cme, name, state + &CCall { cfunc, ref args, name, return_type, elidable } => CCall { cfunc, args: find_vec!(args), name, return_type, elidable }, + &CCallWithFrame { cd, cfunc, ref args, cme, name, state } => CCallWithFrame { cd, cfunc, args: find_vec!(args), cme, name, state: find!(state) }, + &CCallVariadic { cfunc, recv, ref args, cme, name, state } => CCallVariadic { + cfunc, recv: find!(recv), args: find_vec!(args), cme, name, state }, &Defined { op_type, obj, pushval, v, state } => Defined { op_type, obj, pushval, v: find!(v), state: find!(state) }, &DefinedIvar { self_val, pushval, id, state } => DefinedIvar { self_val: find!(self_val), pushval, id, state }, @@ -1646,6 +1665,7 @@ impl Function { Insn::NewRangeFixnum { .. } => types::RangeExact, Insn::ObjectAlloc { .. } => types::HeapObject, Insn::ObjectAllocClass { class, .. } => Type::from_class(*class), + Insn::CCallWithFrame { .. } => types::BasicObject, Insn::CCall { return_type, .. } => *return_type, Insn::CCallVariadic { .. } => types::BasicObject, Insn::GuardType { val, guard_type, .. } => self.type_of(*val).intersection(*guard_type), @@ -2242,6 +2262,10 @@ impl Function { /// Optimize SendWithoutBlock that land in a C method to a direct CCall without /// runtime lookup. fn optimize_c_calls(&mut self) { + if unsafe { rb_zjit_method_tracing_currently_enabled() } { + return; + } + fn gen_patch_points_for_optimized_ccall(fun: &mut Function, block: BlockId, recv_class: VALUE, method_id: ID, method: *const rb_callable_method_entry_struct, state: InsnId) { fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::NoTracePoint, state }); fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass: recv_class, method: method_id, cme: method }, state }); @@ -2254,9 +2278,9 @@ impl Function { self_type: Type, send: Insn, send_insn_id: InsnId, - ) -> Result<(), Option<*const rb_callable_method_entry_struct>> { + ) -> Result<(), ()> { let Insn::SendWithoutBlock { mut recv, cd, mut args, state, .. } = send else { - return Err(None); + return Err(()); }; let call_info = unsafe { (*cd).ci }; @@ -2268,20 +2292,20 @@ impl Function { (class, None) } else { let iseq_insn_idx = fun.frame_state(state).insn_idx; - let Some(recv_type) = fun.profiled_type_of_at(recv, iseq_insn_idx) else { return Err(None) }; + let Some(recv_type) = fun.profiled_type_of_at(recv, iseq_insn_idx) else { return Err(()) }; (recv_type.class(), Some(recv_type)) }; // Do method lookup let method: *const rb_callable_method_entry_struct = unsafe { rb_callable_method_entry(recv_class, method_id) }; if method.is_null() { - return Err(None); + return Err(()); } // Filter for C methods let def_type = unsafe { get_cme_def_type(method) }; if def_type != VM_METHOD_TYPE_CFUNC { - return Err(None); + return Err(()); } // Find the `argc` (arity) of the C method, which describes the parameters it expects @@ -2293,48 +2317,53 @@ impl Function { // // Bail on argc mismatch if argc != cfunc_argc as u32 { - return Err(Some(method)); + return Err(()); } + let ci_flags = unsafe { vm_ci_flag(call_info) }; + + if ci_flags & VM_CALL_ARGS_SIMPLE == 0 { + return Err(()); + } + + gen_patch_points_for_optimized_ccall(fun, block, recv_class, method_id, method, state); + if recv_class.instance_can_have_singleton_class() { + fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::NoSingletonClass { klass: recv_class }, state }); + } + if let Some(profiled_type) = profiled_type { + // Guard receiver class + recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); + } + let cfunc = unsafe { get_mct_func(cfunc) }.cast(); + let mut cfunc_args = vec![recv]; + cfunc_args.append(&mut args); + // Filter for a leaf and GC free function use crate::cruby_methods::FnProperties; - let Some(FnProperties { leaf: true, no_gc: true, return_type, elidable }) = - ZJITState::get_method_annotations().get_cfunc_properties(method) - else { - fun.set_dynamic_send_reason(send_insn_id, SendWithoutBlockCfuncNotVariadic); - return Err(Some(method)); - }; - - let ci_flags = unsafe { vm_ci_flag(call_info) }; // Filter for simple call sites (i.e. no splats etc.) - if ci_flags & VM_CALL_ARGS_SIMPLE != 0 { - gen_patch_points_for_optimized_ccall(fun, block, recv_class, method_id, method, state); - - if recv_class.instance_can_have_singleton_class() { - fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::NoSingletonClass { klass: recv_class }, state }); - } - if let Some(profiled_type) = profiled_type { - // Guard receiver class - recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); + // Commit to the replacement. Put PatchPoint. + if let Some(FnProperties { leaf: true, no_gc: true, return_type, elidable }) = ZJITState::get_method_annotations().get_cfunc_properties(method) { + let ccall = fun.push_insn(block, Insn::CCall { cfunc, args: cfunc_args, name: method_id, return_type, elidable }); + fun.make_equal_to(send_insn_id, ccall); + } else { + if get_option!(stats) { + count_not_inlined_cfunc(fun, block, method); } - - let cfun = unsafe { get_mct_func(cfunc) }.cast(); - let mut cfunc_args = vec![recv]; - cfunc_args.append(&mut args); - let ccall = fun.push_insn(block, Insn::CCall { cfun, args: cfunc_args, name: method_id, return_type, elidable }); + let ccall = fun.push_insn(block, Insn::CCallWithFrame { cd, cfunc, args: cfunc_args, cme: method, name: method_id, state }); fun.make_equal_to(send_insn_id, ccall); - return Ok(()) } + + return Ok(()); } // Variadic method -1 => { - if unsafe { rb_zjit_method_tracing_currently_enabled() } { - return Err(None); - } // The method gets a pointer to the first argument // func(int argc, VALUE *argv, VALUE recv) let ci_flags = unsafe { vm_ci_flag(call_info) }; if ci_flags & VM_CALL_ARGS_SIMPLE != 0 { + if get_option!(stats) { + count_not_inlined_cfunc(fun, block, method); + } gen_patch_points_for_optimized_ccall(fun, block, recv_class, method_id, method, state); if recv_class.instance_can_have_singleton_class() { @@ -2345,9 +2374,9 @@ impl Function { recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); } - let cfun = unsafe { get_mct_func(cfunc) }.cast(); + let cfunc = unsafe { get_mct_func(cfunc) }.cast(); let ccall = fun.push_insn(block, Insn::CCallVariadic { - cfun, + cfunc, recv, args, cme: method, @@ -2369,7 +2398,20 @@ impl Function { _ => unreachable!("unknown cfunc kind: argc={argc}") } - Err(Some(method)) + Err(()) + } + + fn count_not_inlined_cfunc(fun: &mut Function, block: BlockId, cme: *const rb_callable_method_entry_t) { + let owner = unsafe { (*cme).owner }; + let called_id = unsafe { (*cme).called_id }; + let class_name = get_class_name(owner); + let method_name = called_id.contents_lossy(); + let qualified_method_name = format!("{}#{}", class_name, method_name); + let not_inlined_cfunc_counter_pointers = ZJITState::get_not_inlined_cfunc_counter_pointers(); + let counter_ptr = not_inlined_cfunc_counter_pointers.entry(qualified_method_name.clone()).or_insert_with(|| Box::new(0)); + let counter_ptr = &mut **counter_ptr as *mut u64; + + fun.push_insn(block, Insn::IncrCounterPtr { counter_ptr }); } for block in self.rpo() { @@ -2378,23 +2420,8 @@ impl Function { for insn_id in old_insns { if let send @ Insn::SendWithoutBlock { recv, .. } = self.find(insn_id) { let recv_type = self.type_of(recv); - match reduce_to_ccall(self, block, recv_type, send, insn_id) { - Ok(()) => continue, - Err(Some(cme)) => { - if get_option!(stats) { - let owner = unsafe { (*cme).owner }; - let called_id = unsafe { (*cme).called_id }; - let class_name = get_class_name(owner); - let method_name = called_id.contents_lossy(); - let qualified_method_name = format!("{}#{}", class_name, method_name); - let unoptimized_cfunc_counter_pointers = ZJITState::get_unoptimized_cfunc_counter_pointers(); - let counter_ptr = unoptimized_cfunc_counter_pointers.entry(qualified_method_name.clone()).or_insert_with(|| Box::new(0)); - let counter_ptr = &mut **counter_ptr as *mut u64; - - self.push_insn(block, Insn::IncrCounterPtr { counter_ptr }); - } - } - _ => {} + if reduce_to_ccall(self, block, recv_type, send, insn_id).is_ok() { + continue; } } self.push_insn_id(block, insn_id); @@ -2637,7 +2664,8 @@ impl Function { worklist.extend(args); worklist.push_back(state); } - &Insn::InvokeBuiltin { ref args, state, .. } + &Insn::CCallWithFrame { ref args, state, .. } + | &Insn::InvokeBuiltin { ref args, state, .. } | &Insn::InvokeBlock { ref args, state, .. } => { worklist.extend(args); worklist.push_back(state) @@ -10864,8 +10892,10 @@ mod opt_tests { Jump bb2(v4) bb2(v6:BasicObject): v11:HashExact = NewHash - v13:BasicObject = SendWithoutBlock v11, :dup - v15:BasicObject = SendWithoutBlock v13, :freeze + PatchPoint MethodRedefined(Hash@0x1000, dup@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(Hash@0x1000) + v24:BasicObject = CallCFunc dup@0x1038, v11 + v15:BasicObject = SendWithoutBlock v24, :freeze CheckInterrupts Return v15 "); @@ -10955,8 +10985,10 @@ mod opt_tests { Jump bb2(v4) bb2(v6:BasicObject): v11:ArrayExact = NewArray - v13:BasicObject = SendWithoutBlock v11, :dup - v15:BasicObject = SendWithoutBlock v13, :freeze + PatchPoint MethodRedefined(Array@0x1000, dup@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(Array@0x1000) + v24:BasicObject = CallCFunc dup@0x1038, v11 + v15:BasicObject = SendWithoutBlock v24, :freeze CheckInterrupts Return v15 "); @@ -11047,8 +11079,10 @@ mod opt_tests { bb2(v6:BasicObject): v10:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) v12:StringExact = StringCopy v10 - v14:BasicObject = SendWithoutBlock v12, :dup - v16:BasicObject = SendWithoutBlock v14, :freeze + PatchPoint MethodRedefined(String@0x1008, dup@0x1010, cme:0x1018) + PatchPoint NoSingletonClass(String@0x1008) + v25:BasicObject = CallCFunc dup@0x1040, v12 + v16:BasicObject = SendWithoutBlock v25, :freeze CheckInterrupts Return v16 "); @@ -11140,8 +11174,10 @@ mod opt_tests { bb2(v6:BasicObject): v10:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) v12:StringExact = StringCopy v10 - v14:BasicObject = SendWithoutBlock v12, :dup - v16:BasicObject = SendWithoutBlock v14, :-@ + PatchPoint MethodRedefined(String@0x1008, dup@0x1010, cme:0x1018) + PatchPoint NoSingletonClass(String@0x1008) + v25:BasicObject = CallCFunc dup@0x1040, v12 + v16:BasicObject = SendWithoutBlock v25, :-@ CheckInterrupts Return v16 "); @@ -11279,8 +11315,11 @@ mod opt_tests { bb2(v8:BasicObject, v9:BasicObject): v13:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) v25:BasicObject = GuardTypeNot v9, String - v26:BasicObject = SendWithoutBlock v9, :to_s - v17:String = AnyToString v9, str: v26 + PatchPoint MethodRedefined(Array@0x1008, to_s@0x1010, cme:0x1018) + PatchPoint NoSingletonClass(Array@0x1008) + v30:ArrayExact = GuardType v9, ArrayExact + v31:BasicObject = CallCFunc to_s@0x1040, v30 + v17:String = AnyToString v9, str: v31 v19:StringExact = StringConcat v13, v17 CheckInterrupts Return v19 diff --git a/zjit/src/state.rs b/zjit/src/state.rs index 8f88d2424436ab..409cac7e9bb421 100644 --- a/zjit/src/state.rs +++ b/zjit/src/state.rs @@ -51,8 +51,8 @@ pub struct ZJITState { /// Trampoline to call function_stub_hit function_stub_hit_trampoline: CodePtr, - /// Counter pointers for unoptimized C functions - unoptimized_cfunc_counter_pointers: HashMap>, + /// Counter pointers for full frame C functions + full_frame_cfunc_counter_pointers: HashMap>, /// Locations of side exists within generated code exit_locations: Option, @@ -97,7 +97,7 @@ impl ZJITState { exit_trampoline, function_stub_hit_trampoline, exit_trampoline_with_counter: exit_trampoline, - unoptimized_cfunc_counter_pointers: HashMap::new(), + full_frame_cfunc_counter_pointers: HashMap::new(), exit_locations, }; unsafe { ZJIT_STATE = Some(zjit_state); } @@ -162,9 +162,9 @@ impl ZJITState { &mut ZJITState::get_instance().send_fallback_counters } - /// Get a mutable reference to unoptimized cfunc counter pointers - pub fn get_unoptimized_cfunc_counter_pointers() -> &'static mut HashMap> { - &mut ZJITState::get_instance().unoptimized_cfunc_counter_pointers + /// Get a mutable reference to full frame cfunc counter pointers + pub fn get_not_inlined_cfunc_counter_pointers() -> &'static mut HashMap> { + &mut ZJITState::get_instance().full_frame_cfunc_counter_pointers } /// Was --zjit-save-compiled-iseqs specified? diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index d1c1aa7e032a3d..3bfeb46e6c871e 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -148,6 +148,7 @@ make_counters! { send_fallback_send_without_block_cfunc_array_variadic, send_fallback_send_without_block_not_optimized_method_type, send_fallback_send_without_block_direct_too_many_args, + send_fallback_ccall_with_frame_too_many_args, send_fallback_obj_to_string_not_string, send_fallback_not_optimized_instruction, } @@ -318,6 +319,7 @@ pub fn send_fallback_counter(reason: crate::hir::SendFallbackReason) -> Counter SendWithoutBlockCfuncArrayVariadic => send_fallback_send_without_block_cfunc_array_variadic, SendWithoutBlockNotOptimizedMethodType(_) => send_fallback_send_without_block_not_optimized_method_type, SendWithoutBlockDirectTooManyArgs => send_fallback_send_without_block_direct_too_many_args, + CCallWithFrameTooManyArgs => send_fallback_ccall_with_frame_too_many_args, ObjToStringNotString => send_fallback_obj_to_string_not_string, NotOptimizedInstruction(_) => send_fallback_not_optimized_instruction, } @@ -470,10 +472,10 @@ pub extern "C" fn rb_zjit_stats(_ec: EcPtr, _self: VALUE, target_key: VALUE) -> set_stat_f64!(hash, "ratio_in_zjit", 100.0 * zjit_insn_count as f64 / total_insn_count as f64); } - // Set unoptimized cfunc counters - let unoptimized_cfuncs = ZJITState::get_unoptimized_cfunc_counter_pointers(); - for (signature, counter) in unoptimized_cfuncs.iter() { - let key_string = format!("not_optimized_cfuncs_{}", signature); + // Set not inlined cfunc counters + let not_inlined_cfuncs = ZJITState::get_not_inlined_cfunc_counter_pointers(); + for (signature, counter) in not_inlined_cfuncs.iter() { + let key_string = format!("not_inlined_cfuncs_{}", signature); set_stat_usize!(hash, &key_string, **counter); } From 8eead759c1b2a93c66d80089ad9acf166f37d507 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 3 Oct 2025 15:15:05 -0700 Subject: [PATCH 2/3] ZJIT: Relax the limit of cfunc args by 1 Follow-up on https://github.com/ruby/ruby/pull/14661 Unlike SendWithoutBlockDirect, `args` has every argument given to the C call. So there's no `+ 1` for this HIR. --- zjit/src/codegen.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 1c1bd2e07f3ebb..0c720734daaba8 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -405,7 +405,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::GuardBlockParamProxy { level, state } => no_output!(gen_guard_block_param_proxy(jit, asm, level, &function.frame_state(state))), Insn::PatchPoint { invariant, state } => no_output!(gen_patch_point(jit, asm, invariant, &function.frame_state(*state))), Insn::CCall { cfunc, args, name: _, return_type: _, elidable: _ } => gen_ccall(asm, *cfunc, opnds!(args)), - Insn::CCallWithFrame { cd, state, args, .. } if args.len() + 1 > C_ARG_OPNDS.len() => // +1 for self + // Give up CCallWithFrame for 7+ args since asm.ccall() doesn't support it. + Insn::CCallWithFrame { cd, state, args, .. } if args.len() > C_ARG_OPNDS.len() => gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs), Insn::CCallWithFrame { cfunc, args, cme, state, .. } => gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, &function.frame_state(*state)), Insn::CCallVariadic { cfunc, recv, args, name: _, cme, state } => { From 77331b99c606d187b6df32261bc99493484f36ac Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Fri, 3 Oct 2025 15:36:19 -0700 Subject: [PATCH 3/3] ZJIT: Count CCallWithFrame as optimized_send_count (#14722) --- zjit.rb | 1 + zjit/src/codegen.rs | 2 ++ zjit/src/stats.rs | 1 + 3 files changed, 4 insertions(+) diff --git a/zjit.rb b/zjit.rb index 1dccdefca273b1..b44154ad9cef1c 100644 --- a/zjit.rb +++ b/zjit.rb @@ -172,6 +172,7 @@ def stats_string :optimized_send_count, :iseq_optimized_send_count, :inline_cfunc_optimized_send_count, + :non_variadic_cfunc_optimized_send_count, :variadic_cfunc_optimized_send_count, ], buf:, stats:, right_align: true, base: :send_count) print_counters([ diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 0c720734daaba8..b1a2cb672641ef 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -670,6 +670,8 @@ fn gen_patch_point(jit: &mut JITState, asm: &mut Assembler, invariant: &Invarian /// Generate code for a C function call that pushes a frame fn gen_ccall_with_frame(jit: &mut JITState, asm: &mut Assembler, cfunc: *const u8, args: Vec, cme: *const rb_callable_method_entry_t, state: &FrameState) -> lir::Opnd { + gen_incr_counter(asm, Counter::non_variadic_cfunc_optimized_send_count); + gen_prepare_non_leaf_call(jit, asm, state); gen_push_frame(asm, args.len(), state, ControlFrame { diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 3bfeb46e6c871e..323af0f3ed2695 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -157,6 +157,7 @@ make_counters! { optimized_send { iseq_optimized_send_count, inline_cfunc_optimized_send_count, + non_variadic_cfunc_optimized_send_count, variadic_cfunc_optimized_send_count, }