diff --git a/depend b/depend index 63e73a56390056..1f9f0c31eba695 100644 --- a/depend +++ b/depend @@ -7382,8 +7382,10 @@ jit.$(OBJEXT): $(CCAN_DIR)/str/str.h jit.$(OBJEXT): $(hdrdir)/ruby/ruby.h jit.$(OBJEXT): $(top_srcdir)/internal/array.h jit.$(OBJEXT): $(top_srcdir)/internal/basic_operators.h +jit.$(OBJEXT): $(top_srcdir)/internal/bits.h jit.$(OBJEXT): $(top_srcdir)/internal/class.h jit.$(OBJEXT): $(top_srcdir)/internal/compilers.h +jit.$(OBJEXT): $(top_srcdir)/internal/fixnum.h jit.$(OBJEXT): $(top_srcdir)/internal/gc.h jit.$(OBJEXT): $(top_srcdir)/internal/imemo.h jit.$(OBJEXT): $(top_srcdir)/internal/namespace.h diff --git a/doc/contributing/concurrency_guide.md b/doc/contributing/concurrency_guide.md new file mode 100644 index 00000000000000..1fb58f7203ad8f --- /dev/null +++ b/doc/contributing/concurrency_guide.md @@ -0,0 +1,154 @@ +# Concurrency Guide + +This is a guide to thinking about concurrency in the cruby source code, whether that's contributing to Ruby +by writing C or by contributing to one of the JITs. This does not touch on native extensions, only the core +language. It will go over: + +* What needs synchronizing? +* How to use the VM lock, and what you can and can't do when you've acquired this lock. +* What you can and can't do when you've acquired other native locks. +* The difference between the VM lock and the GVL. +* What a VM barrier is and when to use it. +* The lock ordering of some important locks. +* How ruby interrupt handling works. +* The timer thread and what it's responsible for. + +## What needs synchronizing? + +Before ractors, only one ruby thread could run at once. That didn't mean you could forget about concurrency issues, though. The timer thread +is a native thread that interacts with other ruby threads and changes some VM internals, so if these changes can be done in parallel by both the timer +thread and a ruby thread, they need to be synchronized. + +When you add ractors to the mix, it gets more complicated. However, ractors allow you to forget about synchronization for non-shareable objects because +they aren't used across ractors. Only one ruby thread can touch the object at once. For shareable objects, they are deeply frozen so there isn't any +mutation on the objects themselves. However, something like reading/writing constants across ractors does need to be synchronized. In this case, ruby threads need to +see a consistent view of the VM. If publishing the update takes 2 steps or even two separate instructions, like in this case, synchronization is required. + +Most synchronization is to protect VM internals. These internals include structures for the thread scheduler on each ractor, the global ractor scheduler, the +coordination between ruby threads and ractors, global tables (for `fstrings`, encodings, symbols and global vars), etc. Anything that can be mutated by a ractor +that can also be read or mutated by another ractor at the same time requires proper synchronization. + +## The VM Lock + +There's only one VM lock and it is for critical sections that can only be entered by one ractor at a time. +Without ractors, the VM lock is useless. It does not stop all ractors from running, as ractors can run +without trying to acquire this lock. If you're updating global (shared) data between ractors and aren't using +atomics, you need to use a lock and this is a convenient one to use. Unlike other locks, you can allocate ruby-managed +memory with it held. When you take the VM lock, there are things you can and can't do during your critical section: + +You can (as long as no other locks are also held before the VM lock): + +* Create ruby objects, call `ruby_xmalloc`, etc. + +You can't: + +* Context switch to another ruby thread or ractor. This is important, as many things can cause ruby-level context switches including: + + * Calling any ruby method through, for example, `rb_funcall`. If you execute ruby code, a context switch could happen. + This also applies to ruby methods defined in C, as they can be redefined in Ruby. Things that call ruby methods such as + `rb_obj_respond_to` are also disallowed. + + * Calling `rb_raise`. This will call `initialize` on the new exception object. With the VM lock + held, nothing you call should be able to raise an exception. `NoMemoryError` is allowed, however. + + * Calling `rb_nogvl` or a ruby-level mechanism that can context switch like `rb_mutex_lock`. + + * Enter any blocking operation managed by ruby. This will context switch to another ruby thread using `rb_nogvl` or + something equivalent. A blocking operation is one that blocks the thread's progress, such as `sleep` or `IO#read`. + +Internally, the VM lock is the `vm->ractor.sync.lock`. + +You need to be on a ruby thread to take the VM lock. You also can't take it inside any functions that could be called during sweeping, as MMTK sweeps +on another thread and you need a valid `ec` to grab the lock. For this same reason (among others), you can't take it from the timer thread either. + +## Other Locks + +All native locks that aren't the VM lock share a more strict set of rules for what's allowed during the critical section. By native locks, we mean +anything that uses `rb_native_mutex_lock`. Some important locks include the `interrupt_lock`, the ractor scheduling lock (protects global scheduling data structures), +the thread scheduling lock (local to each ractor, protects per-ractor scheduling data structures) and the ractor lock (local to each ractor, protects ractor data structures). + +When you acquire one of these locks, + +You can: + +* Allocate memory though non-ruby allocation such as raw `malloc` or the standard library. But be careful, some functions like `strdup` use +ruby allocation through the use of macros! + +* Use `ccan` lists, as they don't allocate. + +* Do the usual things like set variables or struct fields, manipulate linked lists, signal condition variables etc. + +You can't: + +* Allocate ruby-managed memory. This includes creating ruby objects or using `ruby_xmalloc` or `st_insert`. The reason this +is disallowed is if that allocation causes a GC, then all other ruby threads must join a VM barrier as soon as possible +(when they next check interrupts or acquire the VM lock). This is so that no other ractors are running during GC. If a ruby thread +is waiting (blocked) on this same native lock, it can't join the barrier and a deadlock occurs because the barrier will never finish. + +* Raise exceptions. You also can't use `EC_JUMP_TAG` if it jumps out of the critical section. + +* Context switch. See the `VM Lock` section for more info. + +## Difference Between VM Lock and GVL + +The VM Lock is a particular lock in the source code. There is only one VM Lock. The GVL, on the other hand, is more of a combination of locks. +It is "acquired" when a ruby thread is about to run or is running. Since many ruby threads can run at the same time if they're in different ractors, +there are many GVLs (1 per `SNT` + 1 for the main ractor). It can no longer be thought of as a "Global VM Lock" like it once was before ractors. + +## VM Barriers + +Sometimes, taking the VM Lock isn't enough and you need a guarantee that all ractors have stopped. This happens when running `GC`, for instance. +To get a barrier, you take the VM Lock and call `rb_vm_barrier()`. For the duration that the VM lock is held, no other ractors will be running. It's not used +often as taking a barrier slows ractor performance down considerably, but it's useful to know about and is sometimes the only solution. + +## Lock Orderings + +It's a good idea to not hold more than 2 locks at once on the same thread. Locking multiple locks can introduce deadlocks, so do it with care. When locking +multiple locks at once, follow an ordering that is consistent across the program, otherwise you can introduce deadlocks. Here are the orderings of some important locks: + +* VM lock before ractor_sched_lock +* thread_sched_lock before ractor_sched_lock +* interrupt_lock before timer_th.waiting_lock +* timer_th.waiting_lock before ractor_sched_lock + +These orderings are subject to change, so check the source if you're not sure. On top of this: + +* During each `ubf` (unblock) function, the VM lock can be taken around it in some circumstances. This happens during VM shutdown, for example. +See the "Interrupt Handling" section for more details. + +## Ruby Interrupt Handling + +When the VM runs ruby code, ruby's threads intermittently check ruby-level interrupts. These software interrupts +are for various things in ruby and they can be set by other ruby threads or the timer thread. + +* Ruby threads check when they should give up their timeslice. The native thread switches to another ruby thread when their time is up. +* The timer thread sends a "trap" interrupt to the main thread if any ruby-level signal handlers are pending. +* Ruby threads can have other ruby threads run tasks for them by sending them an interrupt. For instance, ractors send +the main thread an interrupt when they need to `require` a file so that it's done on the main thread. They wait for the +main thread's result. +* During VM shutdown, a "terminate" interrupt is sent to all ractor main threads top stop them asap. +* When calling `Thread#raise`, the caller sends an interrupt to that thread telling it which exception to raise. +* Unlocking a mutex sends the next waiter (if any) an interrupt telling it to grab the lock. +* Signalling or broadcasting on a condition variable tells the waiter(s) to wake up. + +This isn't a complete list. + +When sending an interrupt to a ruby thread, the ruby thread can be blocked. For example, it could be in the middle of a `TCPSocket#read` call. If so, +the receiving thread's `ubf` (unblock function) gets called from the thread (ruby thread or timer thread) that sent the interrupt. +Each ruby thread has a `ubf` that is set when it enters a blocking operation and is unset after returning from it. By default, this `ubf` function sends a +`SIGVTALRM` to the receiving thread to try to unblock it from the kernel so it can check its interrupts. There are other `ubfs` that +aren't associated with a syscall, such as when calling `Ractor#join` or `sleep`. All `ubfs` are called with the `interrupt_lock` held, +so take that into account when using locks inside `ubfs`. + +Remember, `ubfs` can be called from the timer thread so you cannot assume an `ec` inside them. The `ec` (execution context) is only set on ruby threads. + +## The Timer Thread + +The timer thread has a few functions. They are: + +* Send interrupts to ruby threads that have run for their whole timeslice. +* Wake up M:N ruby threads (threads in non-main ractors) blocked on IO or after a specified timeout. This +uses `kqueue` or `epoll`, depending on the OS, to receive IO events on behalf of the threads. +* Continue calling the `SIGVTARLM` signal if a thread is still blocked on a syscall after the first `ubf` call. +* Signal native threads (`SNT`) waiting on a ractor if there are ractors waiting in the global run queue. +* Create more `SNT`s if some are blocked, like on IO or on `Ractor#join`. diff --git a/insns.def b/insns.def index 8225d1cceaf97e..ce358da28575ed 100644 --- a/insns.def +++ b/insns.def @@ -846,6 +846,7 @@ send (CALL_DATA cd, ISEQ blockiseq) (...) (VALUE val) +// attr bool zjit_profile = true; // attr rb_snum_t sp_inc = sp_inc_of_sendish(cd->ci); // attr rb_snum_t comptime_sp_inc = sp_inc_of_sendish(ci); { diff --git a/jit.c b/jit.c index 0b491f0481d875..b7cb05d1c34efd 100644 --- a/jit.c +++ b/jit.c @@ -14,6 +14,7 @@ #include "iseq.h" #include "internal/gc.h" #include "vm_sync.h" +#include "internal/fixnum.h" // Field offsets for the RObject struct enum robject_offsets { @@ -720,3 +721,9 @@ rb_jit_icache_invalidate(void *start, void *end) #error No instruction cache clear available with this compiler on Aarch64! #endif } + +VALUE +rb_jit_fix_mod_fix(VALUE recv, VALUE obj) +{ + return rb_fix_mod_fix(recv, obj); +} diff --git a/yjit.c b/yjit.c index 598fe5716704d0..d0ab367b1c7bb1 100644 --- a/yjit.c +++ b/yjit.c @@ -332,12 +332,6 @@ rb_yjit_fix_div_fix(VALUE recv, VALUE obj) return rb_fix_div_fix(recv, obj); } -VALUE -rb_yjit_fix_mod_fix(VALUE recv, VALUE obj) -{ - return rb_fix_mod_fix(recv, obj); -} - // Return non-zero when `obj` is an array and its last item is a // `ruby2_keywords` hash. We don't support this kind of splat. size_t diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index 0d4d57e0695941..2b4f48d73ec4bd 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -367,7 +367,7 @@ fn main() { .allowlist_function("rb_yarv_ary_entry_internal") .allowlist_function("rb_yjit_ruby2_keywords_splat_p") .allowlist_function("rb_yjit_fix_div_fix") - .allowlist_function("rb_yjit_fix_mod_fix") + .allowlist_function("rb_jit_fix_mod_fix") .allowlist_function("rb_FL_TEST") .allowlist_function("rb_FL_TEST_RAW") .allowlist_function("rb_RB_TYPE_P") diff --git a/yjit/src/cruby.rs b/yjit/src/cruby.rs index 36baecd5358031..0d9e3b74dad874 100644 --- a/yjit/src/cruby.rs +++ b/yjit/src/cruby.rs @@ -199,7 +199,7 @@ pub use rb_get_call_data_ci as get_call_data_ci; pub use rb_yarv_str_eql_internal as rb_str_eql_internal; pub use rb_yarv_ary_entry_internal as rb_ary_entry_internal; pub use rb_yjit_fix_div_fix as rb_fix_div_fix; -pub use rb_yjit_fix_mod_fix as rb_fix_mod_fix; +pub use rb_jit_fix_mod_fix as rb_fix_mod_fix; pub use rb_FL_TEST as FL_TEST; pub use rb_FL_TEST_RAW as FL_TEST_RAW; pub use rb_RB_TYPE_P as RB_TYPE_P; diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 0a14a699284268..74661e7ade9bf8 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1142,7 +1142,6 @@ extern "C" { pub fn rb_ary_unshift_m(argc: ::std::os::raw::c_int, argv: *mut VALUE, ary: VALUE) -> VALUE; pub fn rb_yjit_rb_ary_subseq_length(ary: VALUE, beg: ::std::os::raw::c_long) -> VALUE; pub fn rb_yjit_fix_div_fix(recv: VALUE, obj: VALUE) -> VALUE; - pub fn rb_yjit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; pub fn rb_yjit_ruby2_keywords_splat_p(obj: VALUE) -> usize; pub fn rb_yjit_splat_varg_checks( sp: *mut VALUE, @@ -1275,4 +1274,5 @@ extern "C" { start: *mut ::std::os::raw::c_void, end: *mut ::std::os::raw::c_void, ); + pub fn rb_jit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; } diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 1f04e61dbc9757..f7b335f1bfce89 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -398,6 +398,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::FixnumGe { left, right } => gen_fixnum_ge(asm, opnd!(left), opnd!(right)), Insn::FixnumAnd { left, right } => gen_fixnum_and(asm, opnd!(left), opnd!(right)), Insn::FixnumOr { left, right } => gen_fixnum_or(asm, opnd!(left), opnd!(right)), + &Insn::FixnumMod { left, right, state } => gen_fixnum_mod(jit, asm, opnd!(left), opnd!(right), &function.frame_state(state)), Insn::IsNil { val } => gen_isnil(asm, opnd!(val)), &Insn::IsMethodCfunc { val, cd, cfunc, state: _ } => gen_is_method_cfunc(jit, asm, opnd!(val), cd, cfunc), &Insn::IsBitEqual { left, right } => gen_is_bit_equal(asm, opnd!(left), opnd!(right)), @@ -411,7 +412,8 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio // Give up CCallWithFrame for 7+ args since asm.ccall() doesn't support it. Insn::CCallWithFrame { cd, state, args, .. } if args.len() > C_ARG_OPNDS.len() => gen_send_without_block(jit, asm, *cd, &function.frame_state(*state), SendFallbackReason::CCallWithFrameTooManyArgs), - Insn::CCallWithFrame { cfunc, args, cme, state, .. } => gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, &function.frame_state(*state)), + Insn::CCallWithFrame { cfunc, args, cme, state, blockiseq, .. } => + gen_ccall_with_frame(jit, asm, *cfunc, opnds!(args), *cme, *blockiseq, &function.frame_state(*state)), Insn::CCallVariadic { cfunc, recv, args, name: _, cme, state, return_type: _, elidable: _ } => { gen_ccall_variadic(jit, asm, *cfunc, opnd!(recv), opnds!(args), *cme, &function.frame_state(*state)) } @@ -446,7 +448,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio &Insn::LoadIvarExtended { self_val, id, index } => gen_load_ivar_extended(asm, opnd!(self_val), id, index), &Insn::ArrayMax { state, .. } | &Insn::FixnumDiv { state, .. } - | &Insn::FixnumMod { state, .. } | &Insn::Throw { state, .. } => return Err(state), }; @@ -673,20 +674,36 @@ fn gen_patch_point(jit: &mut JITState, asm: &mut Assembler, invariant: &Invarian } /// Generate code for a C function call that pushes a frame -fn gen_ccall_with_frame(jit: &mut JITState, asm: &mut Assembler, cfunc: *const u8, args: Vec, cme: *const rb_callable_method_entry_t, state: &FrameState) -> lir::Opnd { +fn gen_ccall_with_frame( + jit: &mut JITState, + asm: &mut Assembler, + cfunc: *const u8, + args: Vec, + cme: *const rb_callable_method_entry_t, + blockiseq: Option, + state: &FrameState, +) -> lir::Opnd { gen_incr_counter(asm, Counter::non_variadic_cfunc_optimized_send_count); - gen_prepare_non_leaf_call(jit, asm, state); + let caller_stack_size = state.stack_size() - args.len(); + + // Can't use gen_prepare_non_leaf_call() because we need to adjust the SP + // to account for the receiver and arguments (and block arguments if any) + gen_prepare_call_with_gc(asm, state, false); + gen_save_sp(asm, caller_stack_size); + gen_spill_stack(jit, asm, state); + gen_spill_locals(jit, asm, state); gen_push_frame(asm, args.len(), state, ControlFrame { recv: args[0], iseq: None, cme, frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL, + block_iseq: blockiseq, }); asm_comment!(asm, "switch to new SP register"); - let sp_offset = (state.stack().len() - args.len() + VM_ENV_DATA_SIZE.as_usize()) * SIZEOF_VALUE; + let sp_offset = (caller_stack_size + VM_ENV_DATA_SIZE.as_usize()) * SIZEOF_VALUE; let new_sp = asm.add(SP, sp_offset.into()); asm.mov(SP, new_sp); @@ -738,6 +755,7 @@ fn gen_ccall_variadic( iseq: None, cme, frame_type: VM_FRAME_MAGIC_CFUNC | VM_FRAME_FLAG_CFRAME | VM_ENV_FLAG_LOCAL, + block_iseq: None, }); asm_comment!(asm, "switch to new SP register"); @@ -1130,6 +1148,7 @@ fn gen_send_without_block_direct( iseq: Some(iseq), cme, frame_type: VM_FRAME_MAGIC_METHOD | VM_ENV_FLAG_LOCAL, + block_iseq: None, }); asm_comment!(asm, "switch to new SP register"); @@ -1441,6 +1460,13 @@ fn gen_fixnum_or(asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd) -> lir: asm.or(left, right) } +fn gen_fixnum_mod(jit: &mut JITState, asm: &mut Assembler, left: lir::Opnd, right: lir::Opnd, state: &FrameState) -> lir::Opnd { + // Check for left % 0, which raises ZeroDivisionError + asm.cmp(right, Opnd::from(VALUE::fixnum_from_usize(0))); + asm.je(side_exit(jit, state, FixnumModByZero)); + asm_ccall!(asm, rb_fix_mod_fix, left, right) +} + // Compile val == nil fn gen_isnil(asm: &mut Assembler, val: lir::Opnd) -> lir::Opnd { asm.cmp(val, Qnil.into()); @@ -1719,6 +1745,7 @@ struct ControlFrame { iseq: Option, cme: *const rb_callable_method_entry_t, frame_type: u32, + block_iseq: Option, } /// Compile an interpreter frame @@ -1735,9 +1762,20 @@ fn gen_push_frame(asm: &mut Assembler, argc: usize, state: &FrameState, frame: C }; let ep_offset = state.stack().len() as i32 + local_size - argc as i32 + VM_ENV_DATA_SIZE as i32 - 1; asm.store(Opnd::mem(64, SP, (ep_offset - 2) * SIZEOF_VALUE_I32), VALUE::from(frame.cme).into()); + + let block_handler_opnd = if let Some(block_iseq) = frame.block_iseq { + // Change cfp->block_code in the current frame. See vm_caller_setup_arg_block(). + // VM_CFP_TO_CAPTURED_BLOCK does &cfp->self, rb_captured_block->code.iseq aliases + // with cfp->block_code. + asm.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_BLOCK_CODE), VALUE::from(block_iseq).into()); + let cfp_self_addr = asm.lea(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SELF)); + asm.or(cfp_self_addr, Opnd::Imm(1)) + } else { + VM_BLOCK_HANDLER_NONE.into() + }; + // ep[-1]: block_handler or prev EP - // block_handler is not supported for now - asm.store(Opnd::mem(64, SP, (ep_offset - 1) * SIZEOF_VALUE_I32), VM_BLOCK_HANDLER_NONE.into()); + asm.store(Opnd::mem(64, SP, (ep_offset - 1) * SIZEOF_VALUE_I32), block_handler_opnd); // ep[0]: ENV_FLAGS asm.store(Opnd::mem(64, SP, ep_offset * SIZEOF_VALUE_I32), frame.frame_type.into()); diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 5f4eac1db5ed9e..a84e408861fc54 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -134,6 +134,7 @@ unsafe extern "C" { pub fn rb_str_setbyte(str: VALUE, index: VALUE, value: VALUE) -> VALUE; pub fn rb_str_getbyte(str: VALUE, index: VALUE) -> VALUE; pub fn rb_vm_splat_array(flag: VALUE, ary: VALUE) -> VALUE; + pub fn rb_jit_fix_mod_fix(x: VALUE, y: VALUE) -> VALUE; pub fn rb_vm_concat_array(ary1: VALUE, ary2st: VALUE) -> VALUE; pub fn rb_vm_get_special_object(reg_ep: *const VALUE, value_type: vm_special_object_type) -> VALUE; pub fn rb_vm_concat_to_array(ary1: VALUE, ary2st: VALUE) -> VALUE; @@ -219,6 +220,7 @@ pub use rb_vm_ci_kwarg as vm_ci_kwarg; pub use rb_METHOD_ENTRY_VISI as METHOD_ENTRY_VISI; pub use rb_RCLASS_ORIGIN as RCLASS_ORIGIN; pub use rb_vm_get_special_object as vm_get_special_object; +pub use rb_jit_fix_mod_fix as rb_fix_mod_fix; /// Helper so we can get a Rust string for insn_name() pub fn insn_name(opcode: usize) -> String { diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index c67e229a8009e7..af604661b299b3 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -680,32 +680,33 @@ pub const YARVINSN_trace_setlocal_WC_1: ruby_vminsn_type = 215; pub const YARVINSN_trace_putobject_INT2FIX_0_: ruby_vminsn_type = 216; pub const YARVINSN_trace_putobject_INT2FIX_1_: ruby_vminsn_type = 217; pub const YARVINSN_zjit_getinstancevariable: ruby_vminsn_type = 218; -pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 219; -pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 220; -pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 221; -pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 222; -pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 223; -pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 224; -pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 225; -pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 226; -pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 227; -pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 228; -pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 229; -pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 230; -pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 231; -pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 239; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 240; -pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 241; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 242; -pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 243; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 244; +pub const YARVINSN_zjit_send: ruby_vminsn_type = 219; +pub const YARVINSN_zjit_opt_send_without_block: ruby_vminsn_type = 220; +pub const YARVINSN_zjit_objtostring: ruby_vminsn_type = 221; +pub const YARVINSN_zjit_opt_nil_p: ruby_vminsn_type = 222; +pub const YARVINSN_zjit_opt_plus: ruby_vminsn_type = 223; +pub const YARVINSN_zjit_opt_minus: ruby_vminsn_type = 224; +pub const YARVINSN_zjit_opt_mult: ruby_vminsn_type = 225; +pub const YARVINSN_zjit_opt_div: ruby_vminsn_type = 226; +pub const YARVINSN_zjit_opt_mod: ruby_vminsn_type = 227; +pub const YARVINSN_zjit_opt_eq: ruby_vminsn_type = 228; +pub const YARVINSN_zjit_opt_neq: ruby_vminsn_type = 229; +pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 230; +pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 231; +pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 232; +pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_succ: ruby_vminsn_type = 242; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 243; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 244; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 245; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 370ed568579e0c..7083a082fba1a8 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -468,6 +468,7 @@ pub enum SideExitReason { BlockParamProxyModified, BlockParamProxyNotIseqOrIfunc, StackOverflow, + FixnumModByZero, } #[derive(Debug, Clone, Copy)] @@ -668,6 +669,7 @@ pub enum Insn { state: InsnId, return_type: Type, elidable: bool, + blockiseq: Option, }, /// Call a variadic C function with signature: func(int argc, VALUE *argv, VALUE recv) @@ -1063,11 +1065,14 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { } Ok(()) }, - Insn::CCallWithFrame { cfunc, args, name, .. } => { + Insn::CCallWithFrame { cfunc, args, name, blockiseq, .. } => { write!(f, "CCallWithFrame {}@{:p}", name.contents_lossy(), self.ptr_map.map_ptr(cfunc))?; for arg in args { write!(f, ", {arg}")?; } + if let Some(blockiseq) = blockiseq { + write!(f, ", block={:p}", self.ptr_map.map_ptr(blockiseq))?; + } Ok(()) }, Insn::CCallVariadic { cfunc, recv, args, name, .. } => { @@ -1598,7 +1603,17 @@ impl Function { &ObjectAlloc { val, state } => ObjectAlloc { val: find!(val), state }, &ObjectAllocClass { class, state } => ObjectAllocClass { class, state: find!(state) }, &CCall { cfunc, ref args, name, return_type, elidable } => CCall { cfunc, args: find_vec!(args), name, return_type, elidable }, - &CCallWithFrame { cd, cfunc, ref args, cme, name, state, return_type, elidable } => CCallWithFrame { cd, cfunc, args: find_vec!(args), cme, name, state: find!(state), return_type, elidable }, + &CCallWithFrame { cd, cfunc, ref args, cme, name, state, return_type, elidable, blockiseq } => CCallWithFrame { + cd, + cfunc, + args: find_vec!(args), + cme, + name, + state: find!(state), + return_type, + elidable, + blockiseq, + }, &CCallVariadic { cfunc, recv, ref args, cme, name, state, return_type, elidable } => CCallVariadic { cfunc, recv: find!(recv), args: find_vec!(args), cme, name, state, return_type, elidable }, @@ -2134,7 +2149,7 @@ impl Function { } } // This doesn't actually optimize Send yet, just replaces the fallback reason to be more precise. - // TODO: Optimize Send + // The actual optimization is done in reduce_send_to_ccall. Insn::Send { recv, cd, state, .. } => { let frame_state = self.frame_state(state); let klass = if let Some(klass) = self.type_of(recv).runtime_exact_ruby_class() { @@ -2338,8 +2353,111 @@ impl Function { fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass: recv_class, method: method_id, cme: method }, state }); } - // Try to reduce one SendWithoutBlock to a CCall - fn reduce_to_ccall( + // Try to reduce a Send insn to a CCallWithFrame + fn reduce_send_to_ccall( + fun: &mut Function, + block: BlockId, + self_type: Type, + send: Insn, + send_insn_id: InsnId, + ) -> Result<(), ()> { + let Insn::Send { mut recv, cd, blockiseq, mut args, state, .. } = send else { + return Err(()); + }; + + let call_info = unsafe { (*cd).ci }; + let argc = unsafe { vm_ci_argc(call_info) }; + let method_id = unsafe { rb_vm_ci_mid(call_info) }; + + // If we have info about the class of the receiver + let (recv_class, profiled_type) = if let Some(class) = self_type.runtime_exact_ruby_class() { + (class, None) + } else { + let iseq_insn_idx = fun.frame_state(state).insn_idx; + let Some(recv_type) = fun.profiled_type_of_at(recv, iseq_insn_idx) else { return Err(()) }; + (recv_type.class(), Some(recv_type)) + }; + + // Do method lookup + let method: *const rb_callable_method_entry_struct = unsafe { rb_callable_method_entry(recv_class, method_id) }; + if method.is_null() { + return Err(()); + } + + // Filter for C methods + let def_type = unsafe { get_cme_def_type(method) }; + if def_type != VM_METHOD_TYPE_CFUNC { + return Err(()); + } + + // Find the `argc` (arity) of the C method, which describes the parameters it expects + let cfunc = unsafe { get_cme_def_body_cfunc(method) }; + let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + match cfunc_argc { + 0.. => { + // (self, arg0, arg1, ..., argc) form + // + // Bail on argc mismatch + if argc != cfunc_argc as u32 { + return Err(()); + } + + let ci_flags = unsafe { vm_ci_flag(call_info) }; + + // When seeing &block argument, fall back to dynamic dispatch for now + // TODO: Support block forwarding + if ci_flags & VM_CALL_ARGS_BLOCKARG != 0 { + return Err(()); + } + + // Commit to the replacement. Put PatchPoint. + gen_patch_points_for_optimized_ccall(fun, block, recv_class, method_id, method, state); + if recv_class.instance_can_have_singleton_class() { + fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::NoSingletonClass { klass: recv_class }, state }); + } + + if let Some(profiled_type) = profiled_type { + // Guard receiver class + recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state }); + fun.insn_types[recv.0] = fun.infer_type(recv); + } + + let blockiseq = if blockiseq.is_null() { None } else { Some(blockiseq) }; + + // Emit a call + let cfunc = unsafe { get_mct_func(cfunc) }.cast(); + let mut cfunc_args = vec![recv]; + cfunc_args.append(&mut args); + + let ccall = fun.push_insn(block, Insn::CCallWithFrame { + cd, + cfunc, + args: cfunc_args, + cme: method, + name: method_id, + state, + return_type: types::BasicObject, + elidable: false, + blockiseq, + }); + fun.make_equal_to(send_insn_id, ccall); + return Ok(()); + } + // Variadic method + -1 => { + // func(int argc, VALUE *argv, VALUE recv) + return Err(()); + } + -2 => { + // (self, args_ruby_array) + return Err(()); + } + _ => unreachable!("unknown cfunc kind: argc={argc}") + } + } + + // Try to reduce a SendWithoutBlock insn to a CCall/CCallWithFrame + fn reduce_send_without_block_to_ccall( fun: &mut Function, block: BlockId, self_type: Type, @@ -2440,7 +2558,17 @@ impl Function { if get_option!(stats) { count_not_inlined_cfunc(fun, block, method); } - let ccall = fun.push_insn(block, Insn::CCallWithFrame { cd, cfunc, args: cfunc_args, cme: method, name: method_id, state, return_type, elidable }); + let ccall = fun.push_insn(block, Insn::CCallWithFrame { + cd, + cfunc, + args: cfunc_args, + cme: method, + name: method_id, + state, + return_type, + elidable, + blockiseq: None, + }); fun.make_equal_to(send_insn_id, ccall); } @@ -2555,11 +2683,21 @@ impl Function { let old_insns = std::mem::take(&mut self.blocks[block.0].insns); assert!(self.blocks[block.0].insns.is_empty()); for insn_id in old_insns { - if let send @ Insn::SendWithoutBlock { recv, .. } = self.find(insn_id) { - let recv_type = self.type_of(recv); - if reduce_to_ccall(self, block, recv_type, send, insn_id).is_ok() { - continue; + let send = self.find(insn_id); + match send { + send @ Insn::SendWithoutBlock { recv, .. } => { + let recv_type = self.type_of(recv); + if reduce_send_without_block_to_ccall(self, block, recv_type, send, insn_id).is_ok() { + continue; + } + } + send @ Insn::Send { recv, .. } => { + let recv_type = self.type_of(recv); + if reduce_send_to_ccall(self, block, recv_type, send, insn_id).is_ok() { + continue; + } } + _ => {} } self.push_insn_id(block, insn_id); } @@ -12583,6 +12721,108 @@ mod opt_tests { "); } + #[test] + fn test_optimize_send_with_block() { + eval(r#" + def test = [1, 2, 3].map { |x| x * 2 } + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:ArrayExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v12:ArrayExact = ArrayDup v10 + PatchPoint MethodRedefined(Array@0x1008, map@0x1010, cme:0x1018) + PatchPoint NoSingletonClass(Array@0x1008) + v23:BasicObject = CCallWithFrame map@0x1040, v12, block=0x1048 + CheckInterrupts + Return v23 + "); + } + + #[test] + fn test_do_not_optimize_send_variadic_with_block() { + eval(r#" + def test = [1, 2, 3].index { |x| x == 2 } + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v10:ArrayExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + v12:ArrayExact = ArrayDup v10 + v14:BasicObject = Send v12, 0x1008, :index + CheckInterrupts + Return v14 + "); + } + + #[test] + fn test_do_not_optimize_send_with_block_forwarding() { + eval(r#" + def test(&block) = [].map(&block) + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:ArrayExact = NewArray + GuardBlockParamProxy l0 + v17:HeapObject[BlockParamProxy] = Const Value(VALUE(0x1000)) + v19:BasicObject = Send v14, 0x1008, :map, v17 + CheckInterrupts + Return v19 + "); + } + + #[test] + fn test_do_not_optimize_send_to_iseq_method_with_block() { + eval(r#" + def foo + yield 1 + end + + def test = foo {} + test; test + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:6: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:BasicObject = Send v6, 0x1000, :foo + CheckInterrupts + Return v11 + "); + } + #[test] fn test_inline_attr_reader_constant() { eval(" diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index e935ec9731f383..a6c837df5a48ff 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -83,7 +83,7 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_length => profile_operands(profiler, profile, 1), YARVINSN_opt_size => profile_operands(profiler, profile, 1), YARVINSN_opt_succ => profile_operands(profiler, profile, 1), - YARVINSN_opt_send_without_block => { + YARVINSN_opt_send_without_block | YARVINSN_send => { let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); let argc = unsafe { vm_ci_argc((*cd).ci) }; // Profile all the arguments and self (+1). diff --git a/zjit/src/stats.rs b/zjit/src/stats.rs index 50f6e61f5c242e..33f29fb3aaed22 100644 --- a/zjit/src/stats.rs +++ b/zjit/src/stats.rs @@ -137,6 +137,7 @@ make_counters! { exit_fixnum_add_overflow, exit_fixnum_sub_overflow, exit_fixnum_mult_overflow, + exit_fixnum_mod_by_zero, exit_guard_type_failure, exit_guard_type_not_failure, exit_guard_bit_equals_failure, @@ -332,6 +333,7 @@ pub fn side_exit_counter(reason: crate::hir::SideExitReason) -> Counter { FixnumAddOverflow => exit_fixnum_add_overflow, FixnumSubOverflow => exit_fixnum_sub_overflow, FixnumMultOverflow => exit_fixnum_mult_overflow, + FixnumModByZero => exit_fixnum_mod_by_zero, GuardType(_) => exit_guard_type_failure, GuardTypeNot(_) => exit_guard_type_not_failure, GuardBitEquals(_) => exit_guard_bit_equals_failure,