From 8baf170e936525bbda4838db5bbfb138cf724229 Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Tue, 14 Oct 2025 14:21:59 -0400 Subject: [PATCH 1/6] ZJIT: `mem::take` instead of `drain` then `collect` Gets rid of one transient vec copy/allocation. --- zjit/src/hir.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index a032d9ec8a30a7..e32c15702eb36e 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -2398,7 +2398,7 @@ impl Function { if let Some(replacement) = (props.inline)(fun, tmp_block, recv, &args, state) { // Copy contents of tmp_block to block assert_ne!(block, tmp_block); - let insns = fun.blocks[tmp_block.0].insns.drain(..).collect::>(); + let insns = std::mem::take(&mut fun.blocks[tmp_block.0].insns); fun.blocks[block.0].insns.extend(insns); fun.make_equal_to(send_insn_id, replacement); fun.remove_block(tmp_block); @@ -2453,7 +2453,7 @@ impl Function { if let Some(replacement) = (props.inline)(fun, tmp_block, recv, &args, state) { // Copy contents of tmp_block to block assert_ne!(block, tmp_block); - let insns = fun.blocks[tmp_block.0].insns.drain(..).collect::>(); + let insns = std::mem::take(&mut fun.blocks[tmp_block.0].insns); fun.blocks[block.0].insns.extend(insns); fun.make_equal_to(send_insn_id, replacement); fun.remove_block(tmp_block); From d75207d0043c56034e68c306f6dfe5e7b4f09438 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Tue, 14 Oct 2025 15:09:53 -0400 Subject: [PATCH 2/6] ZJIT: Profile opt_ltlt and opt_aset (#14834) These bring `send_without_block_no_profiles` numbers down dramatically. On lobsters: Before: send_without_block_no_profiles: 3,466,375 After: send_without_block_no_profiles: 1,293,375 all stats before: ``` ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (70.4% of total 14,174,061): Hash#[]: 4,519,771 (31.9%) Kernel#is_a?: 1,030,757 ( 7.3%) Regexp#match?: 399,885 ( 2.8%) String#empty?: 353,775 ( 2.5%) Hash#key?: 349,125 ( 2.5%) Hash#[]=: 344,348 ( 2.4%) String#start_with?: 334,961 ( 2.4%) Kernel#respond_to?: 316,527 ( 2.2%) ObjectSpace::WeakKeyMap#[]: 238,978 ( 1.7%) TrueClass#===: 235,770 ( 1.7%) FalseClass#===: 231,143 ( 1.6%) Array#include?: 211,383 ( 1.5%) Hash#fetch: 204,702 ( 1.4%) Kernel#block_given?: 181,793 ( 1.3%) ActiveSupport::OrderedOptions#_get: 181,272 ( 1.3%) Kernel#dup: 179,341 ( 1.3%) BasicObject#!=: 175,996 ( 1.2%) Class#new: 168,079 ( 1.2%) Kernel#kind_of?: 165,600 ( 1.2%) String#==: 157,734 ( 1.1%) Top-20 not annotated C methods (71.1% of total 14,336,035): Hash#[]: 4,519,781 (31.5%) Kernel#is_a?: 1,212,647 ( 8.5%) Regexp#match?: 399,885 ( 2.8%) String#empty?: 361,013 ( 2.5%) Hash#key?: 349,125 ( 2.4%) Hash#[]=: 344,348 ( 2.4%) String#start_with?: 334,961 ( 2.3%) Kernel#respond_to?: 316,527 ( 2.2%) ObjectSpace::WeakKeyMap#[]: 238,978 ( 1.7%) TrueClass#===: 235,770 ( 1.6%) FalseClass#===: 231,143 ( 1.6%) Array#include?: 211,383 ( 1.5%) Hash#fetch: 204,702 ( 1.4%) Kernel#block_given?: 191,662 ( 1.3%) ActiveSupport::OrderedOptions#_get: 181,272 ( 1.3%) Kernel#dup: 179,348 ( 1.3%) BasicObject#!=: 176,180 ( 1.2%) Class#new: 168,079 ( 1.2%) Kernel#kind_of?: 165,634 ( 1.2%) String#==: 163,666 ( 1.1%) Top-2 not optimized method types for send (100.0% of total 72,318): cfunc: 48,055 (66.4%) iseq: 24,263 (33.6%) Top-6 not optimized method types for send_without_block (100.0% of total 4,536,895): iseq: 2,281,897 (50.3%) bmethod: 985,679 (21.7%) optimized: 952,914 (21.0%) alias: 310,745 ( 6.8%) null: 5,106 ( 0.1%) cfunc: 554 ( 0.0%) Top-13 not optimized instructions (100.0% of total 4,293,123): invokesuper: 2,373,396 (55.3%) invokeblock: 811,891 (18.9%) sendforward: 505,449 (11.8%) opt_eq: 451,754 (10.5%) opt_plus: 74,403 ( 1.7%) opt_minus: 36,227 ( 0.8%) opt_send_without_block: 21,792 ( 0.5%) opt_neq: 7,231 ( 0.2%) opt_mult: 6,752 ( 0.2%) opt_or: 3,753 ( 0.1%) opt_lt: 348 ( 0.0%) opt_ge: 91 ( 0.0%) opt_gt: 36 ( 0.0%) Top-9 send fallback reasons (100.0% of total 27,795,022): send_without_block_polymorphic: 9,505,835 (34.2%) send_no_profiles: 5,894,763 (21.2%) send_without_block_not_optimized_method_type: 4,536,895 (16.3%) not_optimized_instruction: 4,293,123 (15.4%) send_without_block_no_profiles: 3,466,407 (12.5%) send_not_optimized_method_type: 72,318 ( 0.3%) send_without_block_cfunc_array_variadic: 15,134 ( 0.1%) obj_to_string_not_string: 9,918 ( 0.0%) send_without_block_direct_too_many_args: 629 ( 0.0%) Top-9 unhandled YARV insns (100.0% of total 690,482): expandarray: 328,490 (47.6%) checkkeyword: 190,694 (27.6%) getclassvariable: 59,901 ( 8.7%) invokesuperforward: 49,503 ( 7.2%) getblockparam: 48,651 ( 7.0%) opt_duparray_send: 11,978 ( 1.7%) getconstant: 952 ( 0.1%) checkmatch: 290 ( 0.0%) once: 23 ( 0.0%) Top-3 compile error reasons (100.0% of total 3,752,391): register_spill_on_alloc: 3,457,680 (92.1%) register_spill_on_ccall: 176,348 ( 4.7%) exception_handler: 118,363 ( 3.2%) Top-14 side exit reasons (100.0% of total 10,852,021): compile_error: 3,752,391 (34.6%) guard_type_failure: 2,630,877 (24.2%) guard_shape_failure: 1,917,208 (17.7%) unhandled_yarv_insn: 690,482 ( 6.4%) block_param_proxy_not_iseq_or_ifunc: 535,784 ( 4.9%) unhandled_kwarg: 421,989 ( 3.9%) patchpoint: 369,799 ( 3.4%) unknown_newarray_send: 314,786 ( 2.9%) unhandled_splat: 122,062 ( 1.1%) unhandled_hir_insn: 76,394 ( 0.7%) block_param_proxy_modified: 19,193 ( 0.2%) obj_to_string_fallback: 566 ( 0.0%) interrupt: 468 ( 0.0%) guard_type_not_failure: 22 ( 0.0%) send_count: 66,989,407 dynamic_send_count: 27,795,022 (41.5%) optimized_send_count: 39,194,385 (58.5%) iseq_optimized_send_count: 18,060,194 (27.0%) inline_cfunc_optimized_send_count: 6,960,130 (10.4%) non_variadic_cfunc_optimized_send_count: 11,523,682 (17.2%) variadic_cfunc_optimized_send_count: 2,650,379 ( 4.0%) dynamic_getivar_count: 7,365,982 dynamic_setivar_count: 7,245,929 compiled_iseq_count: 4,795 failed_iseq_count: 449 compile_time: 846ms profile_time: 12ms gc_time: 9ms invalidation_time: 61ms vm_write_pc_count: 64,326,442 vm_write_sp_count: 62,982,524 vm_write_locals_count: 62,982,524 vm_write_stack_count: 62,982,524 vm_write_to_parent_iseq_local_count: 292,448 vm_read_from_parent_iseq_local_count: 6,471,353 code_region_bytes: 22,708,224 side_exit_count: 10,852,021 total_insn_count: 517,550,288 vm_insn_count: 162,946,459 zjit_insn_count: 354,603,829 ratio_in_zjit: 68.5% ``` all stats after: ``` ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (71.1% of total 15,575,343): Hash#[]: 4,519,778 (29.0%) Kernel#is_a?: 1,030,758 ( 6.6%) String#<<: 851,931 ( 5.5%) Hash#[]=: 742,938 ( 4.8%) Regexp#match?: 399,886 ( 2.6%) String#empty?: 353,775 ( 2.3%) Hash#key?: 349,127 ( 2.2%) String#start_with?: 334,961 ( 2.2%) Kernel#respond_to?: 316,529 ( 2.0%) ObjectSpace::WeakKeyMap#[]: 238,978 ( 1.5%) TrueClass#===: 235,771 ( 1.5%) FalseClass#===: 231,144 ( 1.5%) Array#include?: 211,380 ( 1.4%) Hash#fetch: 204,701 ( 1.3%) Kernel#block_given?: 181,792 ( 1.2%) ActiveSupport::OrderedOptions#_get: 181,272 ( 1.2%) Kernel#dup: 179,341 ( 1.2%) BasicObject#!=: 175,997 ( 1.1%) Class#new: 168,079 ( 1.1%) Kernel#kind_of?: 165,600 ( 1.1%) Top-20 not annotated C methods (71.6% of total 15,737,486): Hash#[]: 4,519,788 (28.7%) Kernel#is_a?: 1,212,649 ( 7.7%) String#<<: 851,931 ( 5.4%) Hash#[]=: 743,117 ( 4.7%) Regexp#match?: 399,886 ( 2.5%) String#empty?: 361,013 ( 2.3%) Hash#key?: 349,127 ( 2.2%) String#start_with?: 334,961 ( 2.1%) Kernel#respond_to?: 316,529 ( 2.0%) ObjectSpace::WeakKeyMap#[]: 238,978 ( 1.5%) TrueClass#===: 235,771 ( 1.5%) FalseClass#===: 231,144 ( 1.5%) Array#include?: 211,380 ( 1.3%) Hash#fetch: 204,701 ( 1.3%) Kernel#block_given?: 191,661 ( 1.2%) ActiveSupport::OrderedOptions#_get: 181,272 ( 1.2%) Kernel#dup: 179,348 ( 1.1%) BasicObject#!=: 176,181 ( 1.1%) Class#new: 168,079 ( 1.1%) Kernel#kind_of?: 165,634 ( 1.1%) Top-2 not optimized method types for send (100.0% of total 72,318): cfunc: 48,055 (66.4%) iseq: 24,263 (33.6%) Top-6 not optimized method types for send_without_block (100.0% of total 4,523,650): iseq: 2,271,911 (50.2%) bmethod: 985,636 (21.8%) optimized: 949,696 (21.0%) alias: 310,747 ( 6.9%) null: 5,106 ( 0.1%) cfunc: 554 ( 0.0%) Top-13 not optimized instructions (100.0% of total 4,293,126): invokesuper: 2,373,395 (55.3%) invokeblock: 811,894 (18.9%) sendforward: 505,449 (11.8%) opt_eq: 451,754 (10.5%) opt_plus: 74,403 ( 1.7%) opt_minus: 36,228 ( 0.8%) opt_send_without_block: 21,792 ( 0.5%) opt_neq: 7,231 ( 0.2%) opt_mult: 6,752 ( 0.2%) opt_or: 3,753 ( 0.1%) opt_lt: 348 ( 0.0%) opt_ge: 91 ( 0.0%) opt_gt: 36 ( 0.0%) Top-9 send fallback reasons (100.0% of total 25,824,512): send_without_block_polymorphic: 9,721,725 (37.6%) send_no_profiles: 5,894,761 (22.8%) send_without_block_not_optimized_method_type: 4,523,650 (17.5%) not_optimized_instruction: 4,293,126 (16.6%) send_without_block_no_profiles: 1,293,404 ( 5.0%) send_not_optimized_method_type: 72,318 ( 0.3%) send_without_block_cfunc_array_variadic: 15,134 ( 0.1%) obj_to_string_not_string: 9,765 ( 0.0%) send_without_block_direct_too_many_args: 629 ( 0.0%) Top-9 unhandled YARV insns (100.0% of total 690,482): expandarray: 328,490 (47.6%) checkkeyword: 190,694 (27.6%) getclassvariable: 59,901 ( 8.7%) invokesuperforward: 49,503 ( 7.2%) getblockparam: 48,651 ( 7.0%) opt_duparray_send: 11,978 ( 1.7%) getconstant: 952 ( 0.1%) checkmatch: 290 ( 0.0%) once: 23 ( 0.0%) Top-3 compile error reasons (100.0% of total 3,752,504): register_spill_on_alloc: 3,457,793 (92.1%) register_spill_on_ccall: 176,348 ( 4.7%) exception_handler: 118,363 ( 3.2%) Top-14 side exit reasons (100.0% of total 10,860,754): compile_error: 3,752,504 (34.6%) guard_type_failure: 2,638,901 (24.3%) guard_shape_failure: 1,917,198 (17.7%) unhandled_yarv_insn: 690,482 ( 6.4%) block_param_proxy_not_iseq_or_ifunc: 535,785 ( 4.9%) unhandled_kwarg: 421,947 ( 3.9%) patchpoint: 370,447 ( 3.4%) unknown_newarray_send: 314,786 ( 2.9%) unhandled_splat: 122,065 ( 1.1%) unhandled_hir_insn: 76,395 ( 0.7%) block_param_proxy_modified: 19,193 ( 0.2%) obj_to_string_fallback: 566 ( 0.0%) interrupt: 463 ( 0.0%) guard_type_not_failure: 22 ( 0.0%) send_count: 66,945,926 dynamic_send_count: 25,824,512 (38.6%) optimized_send_count: 41,121,414 (61.4%) iseq_optimized_send_count: 18,587,430 (27.8%) inline_cfunc_optimized_send_count: 6,958,641 (10.4%) non_variadic_cfunc_optimized_send_count: 12,911,166 (19.3%) variadic_cfunc_optimized_send_count: 2,664,177 ( 4.0%) dynamic_getivar_count: 7,365,985 dynamic_setivar_count: 7,245,942 compiled_iseq_count: 4,794 failed_iseq_count: 450 compile_time: 852ms profile_time: 13ms gc_time: 11ms invalidation_time: 63ms vm_write_pc_count: 64,284,194 vm_write_sp_count: 62,940,427 vm_write_locals_count: 62,940,427 vm_write_stack_count: 62,940,427 vm_write_to_parent_iseq_local_count: 292,447 vm_read_from_parent_iseq_local_count: 6,470,931 code_region_bytes: 23,019,520 side_exit_count: 10,860,754 total_insn_count: 517,576,267 vm_insn_count: 163,188,187 zjit_insn_count: 354,388,080 ratio_in_zjit: 68.5% ``` --- insns.def | 2 ++ zjit/src/cruby_bindings.inc.rs | 14 ++++---- zjit/src/hir.rs | 59 ++++++++++++++++++++++++++++++++++ zjit/src/profile.rs | 2 ++ 4 files changed, 71 insertions(+), 6 deletions(-) diff --git a/insns.def b/insns.def index eef0d3f5dc1124..b895bffe222f37 100644 --- a/insns.def +++ b/insns.def @@ -1470,6 +1470,7 @@ opt_ltlt * string. Then what happens if that codepoint does not exist in the * string's encoding? Of course an exception. That's not a leaf. */ // attr bool leaf = false; /* has "invalid codepoint" exception */ +// attr bool zjit_profile = true; { val = vm_opt_ltlt(recv, obj); @@ -1537,6 +1538,7 @@ opt_aset /* This is another story than opt_aref. When vm_opt_aset() resorts * to rb_hash_aset(), which should call #hash for `obj`. */ // attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */ +// attr bool zjit_profile = true; { val = vm_opt_aset(recv, obj, set); diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index ea1bf68acc4173..6e3ae05194fa3b 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -694,12 +694,14 @@ pub const YARVINSN_zjit_opt_lt: ruby_vminsn_type = 229; pub const YARVINSN_zjit_opt_le: ruby_vminsn_type = 230; pub const YARVINSN_zjit_opt_gt: ruby_vminsn_type = 231; pub const YARVINSN_zjit_opt_ge: ruby_vminsn_type = 232; -pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 233; -pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 234; -pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 235; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 236; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 237; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_ltlt: ruby_vminsn_type = 233; +pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 234; +pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 235; +pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 236; +pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 237; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 239; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 240; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index e32c15702eb36e..022451e8ab4c77 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -12817,4 +12817,63 @@ mod opt_tests { Return v25 "); } + + #[test] + fn test_optimize_array_aset() { + eval(" + def test(arr) + arr[1] = 10 + end + test([]) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v14:Fixnum[1] = Const Value(1) + v15:Fixnum[10] = Const Value(10) + PatchPoint MethodRedefined(Array@0x1000, []=@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(Array@0x1000) + v28:ArrayExact = GuardType v9, ArrayExact + v29:BasicObject = CCallVariadic []=@0x1038, v28, v14, v15 + CheckInterrupts + Return v15 + "); + } + + #[test] + fn test_optimize_array_ltlt() { + eval(" + def test(arr) + arr << 1 + end + test([]) + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v13:Fixnum[1] = Const Value(1) + PatchPoint MethodRedefined(Array@0x1000, <<@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(Array@0x1000) + v26:ArrayExact = GuardType v9, ArrayExact + v27:BasicObject = CCallWithFrame <<@0x1038, v26, v13 + CheckInterrupts + Return v27 + "); + } } diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index 67f2fdc7403822..c2c35f687b37d4 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -74,6 +74,8 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_or => profile_operands(profiler, profile, 2), YARVINSN_opt_empty_p => profile_operands(profiler, profile, 1), YARVINSN_opt_aref => profile_operands(profiler, profile, 2), + YARVINSN_opt_ltlt => profile_operands(profiler, profile, 2), + YARVINSN_opt_aset => profile_operands(profiler, profile, 3), YARVINSN_opt_not => profile_operands(profiler, profile, 1), YARVINSN_getinstancevariable => profile_self(profiler, profile), YARVINSN_objtostring => profile_operands(profiler, profile, 1), From de9298635dc7dd212c9d80db404f20855b1426af Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Tue, 14 Oct 2025 16:17:54 -0400 Subject: [PATCH 3/6] ZJIT: Profile opt_size, opt_length, opt_regexpmatch2 (#14837) These bring `send_without_block_no_profiles` numbers down more. On lobsters: Before: send_without_block_no_profiles: 1,293,375 After: send_without_block_no_profiles: 998,724 all stats before: ``` ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (71.1% of total 15,575,335): Hash#[]: 4,519,774 (29.0%) Kernel#is_a?: 1,030,758 ( 6.6%) String#<<: 851,929 ( 5.5%) Hash#[]=: 742,941 ( 4.8%) Regexp#match?: 399,889 ( 2.6%) String#empty?: 353,775 ( 2.3%) Hash#key?: 349,129 ( 2.2%) String#start_with?: 334,961 ( 2.2%) Kernel#respond_to?: 316,527 ( 2.0%) ObjectSpace::WeakKeyMap#[]: 238,978 ( 1.5%) TrueClass#===: 235,771 ( 1.5%) FalseClass#===: 231,144 ( 1.5%) Array#include?: 211,381 ( 1.4%) Hash#fetch: 204,702 ( 1.3%) Kernel#block_given?: 181,792 ( 1.2%) ActiveSupport::OrderedOptions#_get: 181,272 ( 1.2%) Kernel#dup: 179,340 ( 1.2%) BasicObject#!=: 175,997 ( 1.1%) Class#new: 168,078 ( 1.1%) Kernel#kind_of?: 165,600 ( 1.1%) Top-20 not annotated C methods (71.6% of total 15,737,478): Hash#[]: 4,519,784 (28.7%) Kernel#is_a?: 1,212,649 ( 7.7%) String#<<: 851,929 ( 5.4%) Hash#[]=: 743,120 ( 4.7%) Regexp#match?: 399,889 ( 2.5%) String#empty?: 361,013 ( 2.3%) Hash#key?: 349,129 ( 2.2%) String#start_with?: 334,961 ( 2.1%) Kernel#respond_to?: 316,527 ( 2.0%) ObjectSpace::WeakKeyMap#[]: 238,978 ( 1.5%) TrueClass#===: 235,771 ( 1.5%) FalseClass#===: 231,144 ( 1.5%) Array#include?: 211,381 ( 1.3%) Hash#fetch: 204,702 ( 1.3%) Kernel#block_given?: 191,661 ( 1.2%) ActiveSupport::OrderedOptions#_get: 181,272 ( 1.2%) Kernel#dup: 179,347 ( 1.1%) BasicObject#!=: 176,181 ( 1.1%) Class#new: 168,078 ( 1.1%) Kernel#kind_of?: 165,634 ( 1.1%) Top-2 not optimized method types for send (100.0% of total 72,318): cfunc: 48,055 (66.4%) iseq: 24,263 (33.6%) Top-6 not optimized method types for send_without_block (100.0% of total 4,523,648): iseq: 2,271,904 (50.2%) bmethod: 985,636 (21.8%) optimized: 949,702 (21.0%) alias: 310,746 ( 6.9%) null: 5,106 ( 0.1%) cfunc: 554 ( 0.0%) Top-13 not optimized instructions (100.0% of total 4,293,096): invokesuper: 2,373,391 (55.3%) invokeblock: 811,872 (18.9%) sendforward: 505,448 (11.8%) opt_eq: 451,754 (10.5%) opt_plus: 74,403 ( 1.7%) opt_minus: 36,225 ( 0.8%) opt_send_without_block: 21,792 ( 0.5%) opt_neq: 7,231 ( 0.2%) opt_mult: 6,752 ( 0.2%) opt_or: 3,753 ( 0.1%) opt_lt: 348 ( 0.0%) opt_ge: 91 ( 0.0%) opt_gt: 36 ( 0.0%) Top-9 send fallback reasons (100.0% of total 25,824,463): send_without_block_polymorphic: 9,721,727 (37.6%) send_no_profiles: 5,894,760 (22.8%) send_without_block_not_optimized_method_type: 4,523,648 (17.5%) not_optimized_instruction: 4,293,096 (16.6%) send_without_block_no_profiles: 1,293,386 ( 5.0%) send_not_optimized_method_type: 72,318 ( 0.3%) send_without_block_cfunc_array_variadic: 15,134 ( 0.1%) obj_to_string_not_string: 9,765 ( 0.0%) send_without_block_direct_too_many_args: 629 ( 0.0%) Top-9 unhandled YARV insns (100.0% of total 690,482): expandarray: 328,490 (47.6%) checkkeyword: 190,694 (27.6%) getclassvariable: 59,901 ( 8.7%) invokesuperforward: 49,503 ( 7.2%) getblockparam: 48,651 ( 7.0%) opt_duparray_send: 11,978 ( 1.7%) getconstant: 952 ( 0.1%) checkmatch: 290 ( 0.0%) once: 23 ( 0.0%) Top-3 compile error reasons (100.0% of total 3,752,502): register_spill_on_alloc: 3,457,791 (92.1%) register_spill_on_ccall: 176,348 ( 4.7%) exception_handler: 118,363 ( 3.2%) Top-14 side exit reasons (100.0% of total 10,860,787): compile_error: 3,752,502 (34.6%) guard_type_failure: 2,638,903 (24.3%) guard_shape_failure: 1,917,195 (17.7%) unhandled_yarv_insn: 690,482 ( 6.4%) block_param_proxy_not_iseq_or_ifunc: 535,787 ( 4.9%) unhandled_kwarg: 421,943 ( 3.9%) patchpoint: 370,449 ( 3.4%) unknown_newarray_send: 314,785 ( 2.9%) unhandled_splat: 122,060 ( 1.1%) unhandled_hir_insn: 76,396 ( 0.7%) block_param_proxy_modified: 19,193 ( 0.2%) obj_to_string_fallback: 566 ( 0.0%) interrupt: 504 ( 0.0%) guard_type_not_failure: 22 ( 0.0%) send_count: 66,945,801 dynamic_send_count: 25,824,463 (38.6%) optimized_send_count: 41,121,338 (61.4%) iseq_optimized_send_count: 18,587,368 (27.8%) inline_cfunc_optimized_send_count: 6,958,635 (10.4%) non_variadic_cfunc_optimized_send_count: 12,911,155 (19.3%) variadic_cfunc_optimized_send_count: 2,664,180 ( 4.0%) dynamic_getivar_count: 7,365,975 dynamic_setivar_count: 7,245,897 compiled_iseq_count: 4,794 failed_iseq_count: 450 compile_time: 760ms profile_time: 9ms gc_time: 8ms invalidation_time: 55ms vm_write_pc_count: 64,284,053 vm_write_sp_count: 62,940,297 vm_write_locals_count: 62,940,297 vm_write_stack_count: 62,940,297 vm_write_to_parent_iseq_local_count: 292,446 vm_read_from_parent_iseq_local_count: 6,470,923 code_region_bytes: 23,019,520 side_exit_count: 10,860,787 total_insn_count: 517,576,320 vm_insn_count: 163,188,910 zjit_insn_count: 354,387,410 ratio_in_zjit: 68.5% ``` all stats after: ``` ***ZJIT: Printing ZJIT statistics on exit*** Top-20 not inlined C methods (70.4% of total 15,740,856): Hash#[]: 4,519,792 (28.7%) Kernel#is_a?: 1,030,776 ( 6.5%) String#<<: 851,940 ( 5.4%) Hash#[]=: 742,914 ( 4.7%) Regexp#match?: 399,887 ( 2.5%) String#empty?: 353,775 ( 2.2%) Hash#key?: 349,139 ( 2.2%) String#start_with?: 334,961 ( 2.1%) Kernel#respond_to?: 316,529 ( 2.0%) ObjectSpace::WeakKeyMap#[]: 238,978 ( 1.5%) TrueClass#===: 235,771 ( 1.5%) FalseClass#===: 231,144 ( 1.5%) Array#include?: 211,381 ( 1.3%) Hash#fetch: 204,702 ( 1.3%) Kernel#block_given?: 181,788 ( 1.2%) ActiveSupport::OrderedOptions#_get: 181,272 ( 1.2%) Kernel#dup: 179,341 ( 1.1%) BasicObject#!=: 175,996 ( 1.1%) Class#new: 168,079 ( 1.1%) Kernel#kind_of?: 165,600 ( 1.1%) Top-20 not annotated C methods (70.9% of total 15,902,999): Hash#[]: 4,519,802 (28.4%) Kernel#is_a?: 1,212,667 ( 7.6%) String#<<: 851,940 ( 5.4%) Hash#[]=: 743,093 ( 4.7%) Regexp#match?: 399,887 ( 2.5%) String#empty?: 361,013 ( 2.3%) Hash#key?: 349,139 ( 2.2%) String#start_with?: 334,961 ( 2.1%) Kernel#respond_to?: 316,529 ( 2.0%) ObjectSpace::WeakKeyMap#[]: 238,978 ( 1.5%) TrueClass#===: 235,771 ( 1.5%) FalseClass#===: 231,144 ( 1.5%) Array#include?: 211,381 ( 1.3%) Hash#fetch: 204,702 ( 1.3%) Kernel#block_given?: 191,657 ( 1.2%) ActiveSupport::OrderedOptions#_get: 181,272 ( 1.1%) Kernel#dup: 179,348 ( 1.1%) BasicObject#!=: 176,180 ( 1.1%) Class#new: 168,079 ( 1.1%) Kernel#kind_of?: 165,634 ( 1.0%) Top-2 not optimized method types for send (100.0% of total 72,318): cfunc: 48,055 (66.4%) iseq: 24,263 (33.6%) Top-6 not optimized method types for send_without_block (100.0% of total 4,523,637): iseq: 2,271,900 (50.2%) bmethod: 985,636 (21.8%) optimized: 949,695 (21.0%) alias: 310,746 ( 6.9%) null: 5,106 ( 0.1%) cfunc: 554 ( 0.0%) Top-13 not optimized instructions (100.0% of total 4,293,128): invokesuper: 2,373,401 (55.3%) invokeblock: 811,890 (18.9%) sendforward: 505,449 (11.8%) opt_eq: 451,754 (10.5%) opt_plus: 74,403 ( 1.7%) opt_minus: 36,228 ( 0.8%) opt_send_without_block: 21,792 ( 0.5%) opt_neq: 7,231 ( 0.2%) opt_mult: 6,752 ( 0.2%) opt_or: 3,753 ( 0.1%) opt_lt: 348 ( 0.0%) opt_ge: 91 ( 0.0%) opt_gt: 36 ( 0.0%) Top-9 send fallback reasons (100.0% of total 25,530,605): send_without_block_polymorphic: 9,722,499 (38.1%) send_no_profiles: 5,894,763 (23.1%) send_without_block_not_optimized_method_type: 4,523,637 (17.7%) not_optimized_instruction: 4,293,128 (16.8%) send_without_block_no_profiles: 998,732 ( 3.9%) send_not_optimized_method_type: 72,318 ( 0.3%) send_without_block_cfunc_array_variadic: 15,134 ( 0.1%) obj_to_string_not_string: 9,765 ( 0.0%) send_without_block_direct_too_many_args: 629 ( 0.0%) Top-9 unhandled YARV insns (100.0% of total 690,482): expandarray: 328,490 (47.6%) checkkeyword: 190,694 (27.6%) getclassvariable: 59,901 ( 8.7%) invokesuperforward: 49,503 ( 7.2%) getblockparam: 48,651 ( 7.0%) opt_duparray_send: 11,978 ( 1.7%) getconstant: 952 ( 0.1%) checkmatch: 290 ( 0.0%) once: 23 ( 0.0%) Top-3 compile error reasons (100.0% of total 3,752,500): register_spill_on_alloc: 3,457,792 (92.1%) register_spill_on_ccall: 176,348 ( 4.7%) exception_handler: 118,360 ( 3.2%) Top-14 side exit reasons (100.0% of total 10,860,797): compile_error: 3,752,500 (34.6%) guard_type_failure: 2,638,909 (24.3%) guard_shape_failure: 1,917,203 (17.7%) unhandled_yarv_insn: 690,482 ( 6.4%) block_param_proxy_not_iseq_or_ifunc: 535,784 ( 4.9%) unhandled_kwarg: 421,947 ( 3.9%) patchpoint: 370,474 ( 3.4%) unknown_newarray_send: 314,786 ( 2.9%) unhandled_splat: 122,067 ( 1.1%) unhandled_hir_insn: 76,395 ( 0.7%) block_param_proxy_modified: 19,193 ( 0.2%) obj_to_string_fallback: 566 ( 0.0%) interrupt: 469 ( 0.0%) guard_type_not_failure: 22 ( 0.0%) send_count: 66,945,326 dynamic_send_count: 25,530,605 (38.1%) optimized_send_count: 41,414,721 (61.9%) iseq_optimized_send_count: 18,587,439 (27.8%) inline_cfunc_optimized_send_count: 7,086,426 (10.6%) non_variadic_cfunc_optimized_send_count: 13,076,682 (19.5%) variadic_cfunc_optimized_send_count: 2,664,174 ( 4.0%) dynamic_getivar_count: 7,365,985 dynamic_setivar_count: 7,245,954 compiled_iseq_count: 4,794 failed_iseq_count: 450 compile_time: 748ms profile_time: 9ms gc_time: 8ms invalidation_time: 58ms vm_write_pc_count: 64,155,801 vm_write_sp_count: 62,812,041 vm_write_locals_count: 62,812,041 vm_write_stack_count: 62,812,041 vm_write_to_parent_iseq_local_count: 292,448 vm_read_from_parent_iseq_local_count: 6,470,939 code_region_bytes: 23,052,288 side_exit_count: 10,860,797 total_insn_count: 517,576,915 vm_insn_count: 163,192,099 zjit_insn_count: 354,384,816 ratio_in_zjit: 68.5% ``` --- insns.def | 3 ++ zjit/src/cruby_bindings.inc.rs | 9 ++-- zjit/src/hir.rs | 85 +++++++++++++++++++++++++++++++++- zjit/src/profile.rs | 3 ++ 4 files changed, 96 insertions(+), 4 deletions(-) diff --git a/insns.def b/insns.def index b895bffe222f37..69a8210d7d6f99 100644 --- a/insns.def +++ b/insns.def @@ -1553,6 +1553,7 @@ opt_length (CALL_DATA cd) (VALUE recv) (VALUE val) +// attr bool zjit_profile = true; { val = vm_opt_length(recv, BOP_LENGTH); @@ -1567,6 +1568,7 @@ opt_size (CALL_DATA cd) (VALUE recv) (VALUE val) +// attr bool zjit_profile = true; { val = vm_opt_length(recv, BOP_SIZE); @@ -1626,6 +1628,7 @@ opt_regexpmatch2 (VALUE obj2, VALUE obj1) (VALUE val) // attr bool leaf = false; /* match_at() has rb_thread_check_ints() */ +// attr bool zjit_profile = true; { val = vm_opt_regexpmatch2(obj2, obj1); diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 6e3ae05194fa3b..56b569e064c0b0 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -699,9 +699,12 @@ pub const YARVINSN_zjit_opt_and: ruby_vminsn_type = 234; pub const YARVINSN_zjit_opt_or: ruby_vminsn_type = 235; pub const YARVINSN_zjit_opt_aref: ruby_vminsn_type = 236; pub const YARVINSN_zjit_opt_aset: ruby_vminsn_type = 237; -pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 238; -pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 239; -pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_length: ruby_vminsn_type = 238; +pub const YARVINSN_zjit_opt_size: ruby_vminsn_type = 239; +pub const YARVINSN_zjit_opt_empty_p: ruby_vminsn_type = 240; +pub const YARVINSN_zjit_opt_not: ruby_vminsn_type = 241; +pub const YARVINSN_zjit_opt_regexpmatch2: ruby_vminsn_type = 242; +pub const VM_INSTRUCTION_SIZE: ruby_vminsn_type = 243; pub type ruby_vminsn_type = u32; pub type rb_iseq_callback = ::std::option::Option< unsafe extern "C" fn(arg1: *const rb_iseq_t, arg2: *mut ::std::os::raw::c_void), diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 022451e8ab4c77..33c034b819a7ec 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -4957,7 +4957,7 @@ mod tests { } #[track_caller] - fn assert_contains_opcode(method: &str, opcode: u32) { + pub fn assert_contains_opcode(method: &str, opcode: u32) { let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("self", method)); unsafe { crate::cruby::rb_zjit_profile_disable(iseq) }; assert!(iseq_contains_opcode(iseq, opcode), "iseq {method} does not contain {}", insn_name(opcode as usize)); @@ -7999,6 +7999,7 @@ mod opt_tests { use super::*; use crate::{hir_strings, options::*}; use insta::assert_snapshot; + use super::tests::assert_contains_opcode; #[track_caller] fn hir_string(method: &str) -> String { @@ -12876,4 +12877,86 @@ mod opt_tests { Return v27 "); } + + #[test] + fn test_optimize_array_length() { + eval(" + def test(arr) = arr.length + test([]) + "); + assert_contains_opcode("test", YARVINSN_opt_length); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + PatchPoint MethodRedefined(Array@0x1000, length@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(Array@0x1000) + v25:ArrayExact = GuardType v9, ArrayExact + v26:Fixnum = CCall length@0x1038, v25 + CheckInterrupts + Return v26 + "); + } + + #[test] + fn test_optimize_array_size() { + eval(" + def test(arr) = arr.size + test([]) + "); + assert_contains_opcode("test", YARVINSN_opt_size); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + PatchPoint MethodRedefined(Array@0x1000, size@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(Array@0x1000) + v25:ArrayExact = GuardType v9, ArrayExact + v26:Fixnum = CCall size@0x1038, v25 + CheckInterrupts + Return v26 + "); + } + + #[test] + fn test_optimize_regexpmatch2() { + eval(r#" + def test(s) = s =~ /a/ + test("foo") + "#); + assert_contains_opcode("test", YARVINSN_opt_regexpmatch2); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2) + bb1(v5:BasicObject, v6:BasicObject): + EntryPoint JIT(0) + Jump bb2(v5, v6) + bb2(v8:BasicObject, v9:BasicObject): + v13:RegexpExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) + PatchPoint MethodRedefined(String@0x1008, =~@0x1010, cme:0x1018) + PatchPoint NoSingletonClass(String@0x1008) + v26:StringExact = GuardType v9, StringExact + v27:BasicObject = CCallWithFrame =~@0x1040, v26, v13 + CheckInterrupts + Return v27 + "); + } } diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index c2c35f687b37d4..e7db47142bcf65 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -78,7 +78,10 @@ fn profile_insn(bare_opcode: ruby_vminsn_type, ec: EcPtr) { YARVINSN_opt_aset => profile_operands(profiler, profile, 3), YARVINSN_opt_not => profile_operands(profiler, profile, 1), YARVINSN_getinstancevariable => profile_self(profiler, profile), + YARVINSN_opt_regexpmatch2 => profile_operands(profiler, profile, 2), YARVINSN_objtostring => profile_operands(profiler, profile, 1), + YARVINSN_opt_length => profile_operands(profiler, profile, 1), + YARVINSN_opt_size => profile_operands(profiler, profile, 1), YARVINSN_opt_send_without_block => { let cd: *const rb_call_data = profiler.insn_opnd(0).as_ptr(); let argc = unsafe { vm_ci_argc((*cd).ci) }; From ed94e543515cad8624120c09500ff38fe1b56160 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Wed, 15 Oct 2025 04:36:47 +0800 Subject: [PATCH 4/6] ZJIT: Centralize the allocation of scratch registers (#14815) --- zjit/src/asm/mod.rs | 9 +- zjit/src/backend/arm64/mod.rs | 270 ++++++++++++++++++----------- zjit/src/backend/lir.rs | 91 +++++----- zjit/src/backend/x86_64/mod.rs | 301 ++++++++++++++++++++------------- zjit/src/codegen.rs | 20 +-- 5 files changed, 412 insertions(+), 279 deletions(-) diff --git a/zjit/src/asm/mod.rs b/zjit/src/asm/mod.rs index aeb429382d88c0..32dc633a2941ce 100644 --- a/zjit/src/asm/mod.rs +++ b/zjit/src/asm/mod.rs @@ -20,7 +20,6 @@ pub mod arm64; pub struct Label(pub usize); /// Reference to an ASM label -#[derive(Clone)] pub struct LabelRef { // Position in the code block where the label reference exists pos: usize, @@ -34,7 +33,7 @@ pub struct LabelRef { num_bytes: usize, /// The object that knows how to encode the branch instruction. - encode: fn(&mut CodeBlock, i64, i64) + encode: Box, } /// Block of memory into which instructions can be assembled @@ -223,11 +222,11 @@ impl CodeBlock { } // Add a label reference at the current write position - pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: fn(&mut CodeBlock, i64, i64)) { + pub fn label_ref(&mut self, label: Label, num_bytes: usize, encode: impl Fn(&mut CodeBlock, i64, i64) + 'static) { assert!(label.0 < self.label_addrs.len()); // Keep track of the reference - self.label_refs.push(LabelRef { pos: self.write_pos, label, num_bytes, encode }); + self.label_refs.push(LabelRef { pos: self.write_pos, label, num_bytes, encode: Box::new(encode) }); // Move past however many bytes the instruction takes up if self.write_pos + num_bytes < self.mem_size { @@ -251,7 +250,7 @@ impl CodeBlock { assert!(label_addr < self.mem_size); self.write_pos = ref_pos; - (label_ref.encode)(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64); + (label_ref.encode.as_ref())(self, (ref_pos + label_ref.num_bytes) as i64, label_addr as i64); // Assert that we've written the same number of bytes that we // expected to have written. diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 5ac62c059986d5..6750926b35daa1 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -113,8 +113,8 @@ fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) { b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); 1 } else { - let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); - br(cb, Assembler::SCRATCH0); + let num_insns = emit_load_value(cb, Assembler::EMIT0_OPND, dst_addr as u64); + br(cb, Assembler::EMIT0_OPND); num_insns + 1 }; @@ -181,7 +181,7 @@ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { /// List of registers that can be used for register allocation. /// This has the same number of registers for x86_64 and arm64. -/// SCRATCH0 and SCRATCH1 are excluded. +/// SCRATCH_OPND, EMIT0_OPND, and EMIT1_OPND are excluded. pub const ALLOC_REGS: &[Reg] = &[ X0_REG, X1_REG, @@ -193,17 +193,33 @@ pub const ALLOC_REGS: &[Reg] = &[ X12_REG, ]; -impl Assembler -{ - /// Special scratch registers for intermediate processing. - /// This register is call-clobbered (so we don't have to save it before using it). - /// Avoid using if you can since this is used to lower [Insn] internally and - /// so conflicts are possible. - pub const SCRATCH_REG: Reg = X16_REG; - const SCRATCH0_REG: Reg = Self::SCRATCH_REG; - const SCRATCH1_REG: Reg = X17_REG; - const SCRATCH0: A64Opnd = A64Opnd::Reg(Self::SCRATCH0_REG); - const SCRATCH1: A64Opnd = A64Opnd::Reg(Self::SCRATCH1_REG); +/// Special scratch register for intermediate processing. It should be used only by +/// [`Assembler::arm64_split_with_scratch_reg`] or [`Assembler::new_with_scratch_reg`]. +const SCRATCH_OPND: Opnd = Opnd::Reg(X15_REG); + +impl Assembler { + /// Special registers for intermediate processing in arm64_emit. It should be used only by arm64_emit. + /// TODO: Remove the use of these registers by splitting instructions in arm64_split_with_scratch_reg. + const EMIT0_REG: Reg = X16_REG; + const EMIT1_REG: Reg = X17_REG; + const EMIT0_OPND: A64Opnd = A64Opnd::Reg(Self::EMIT0_REG); + const EMIT1_OPND: A64Opnd = A64Opnd::Reg(Self::EMIT1_REG); + + /// Return an Assembler with scratch registers disabled in the backend, and a scratch register. + pub fn new_with_scratch_reg() -> (Self, Opnd) { + (Self::new_with_label_names(Vec::default(), 0, true), SCRATCH_OPND) + } + + /// Return true if opnd contains a scratch reg + pub fn has_scratch_reg(opnd: Opnd) -> bool { + match opnd { + Opnd::Reg(_) => opnd == SCRATCH_OPND, + Opnd::Mem(Mem { base: MemBase::Reg(reg_no), .. }) => { + reg_no == SCRATCH_OPND.unwrap_reg().reg_no + } + _ => false, + } + } /// Get the list of registers from which we will allocate on this platform pub fn get_alloc_regs() -> Vec { @@ -372,7 +388,7 @@ impl Assembler let live_ranges: Vec = take(&mut self.live_ranges); let mut iterator = self.insns.into_iter().enumerate().peekable(); - let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len()); + let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len(), self.accept_scratch_reg); let asm = &mut asm_local; while let Some((index, mut insn)) = iterator.next() { @@ -555,14 +571,6 @@ impl Assembler } } }, - Insn::IncrCounter { mem, value } => { - let counter_addr = match mem { - Opnd::Mem(_) => asm.lea(*mem), - _ => *mem - }; - - asm.incr_counter(counter_addr, *value); - }, Insn::JmpOpnd(opnd) => { if let Opnd::Mem(_) = opnd { let opnd0 = split_load_operand(asm, *opnd); @@ -679,6 +687,58 @@ impl Assembler asm_local } + /// Split instructions using scratch registers. To maximize the use of the register pool for + /// VRegs, most splits should happen in [`Self::arm64_split`]. However, some instructions + /// need to be split with registers after `alloc_regs`, e.g. for `compile_side_exits`, so this + /// splits them and uses scratch registers for it. + fn arm64_split_with_scratch_reg(mut self) -> Assembler { + let mut iterator = self.insns.into_iter().enumerate().peekable(); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), self.live_ranges.len(), true); + + while let Some((_, mut insn)) = iterator.next() { + match &mut insn { + // For compile_side_exits, support splitting simple C arguments here + Insn::CCall { opnds, .. } if !opnds.is_empty() => { + for (i, opnd) in opnds.iter().enumerate() { + asm.load_into(C_ARG_OPNDS[i], *opnd); + } + *opnds = vec![]; + asm.push_insn(insn); + } + &mut Insn::Lea { opnd, out } => { + match (opnd, out) { + // Split here for compile_side_exits + (Opnd::Mem(_), Opnd::Mem(_)) => { + asm.lea_into(SCRATCH_OPND, opnd); + asm.store(out, SCRATCH_OPND); + } + _ => { + asm.push_insn(insn); + } + } + } + // Convert Opnd::const_ptr into Opnd::Mem. It's split here compile_side_exits. + &mut Insn::IncrCounter { mem, value } => { + assert!(matches!(mem, Opnd::UImm(_))); + asm.load_into(SCRATCH_OPND, mem); + asm.lea_into(SCRATCH_OPND, Opnd::mem(64, SCRATCH_OPND, 0)); + asm.incr_counter(SCRATCH_OPND, value); + } + // Resolve ParallelMov that couldn't be handled without a scratch register. + Insn::ParallelMov { moves } => { + for (reg, opnd) in Self::resolve_parallel_moves(moves, Some(SCRATCH_OPND)).unwrap() { + asm.load_into(Opnd::Reg(reg), opnd); + } + } + _ => { + asm.push_insn(insn); + } + } + } + + asm + } + /// Emit platform-specific machine code /// Returns a list of GC offsets. Can return failure to signal caller to retry. fn arm64_emit(&mut self, cb: &mut CodeBlock) -> Option> { @@ -739,8 +799,8 @@ impl Assembler // that if it doesn't match it will skip over the // instructions used for branching. bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into()); - emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); - br(cb, Assembler::SCRATCH0); + emit_load_value(cb, Assembler::EMIT0_OPND, dst_addr as u64); + br(cb, Assembler::EMIT0_OPND); // Here we'll return the number of instructions that it // took to write out the destination address + 1 for the @@ -806,8 +866,8 @@ impl Assembler } else { cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); } - emit_load_value(cb, Assembler::SCRATCH0, dst_addr); - br(cb, Assembler::SCRATCH0); + emit_load_value(cb, Assembler::EMIT0_OPND, dst_addr); + br(cb, Assembler::EMIT0_OPND); } */ } else { @@ -979,7 +1039,7 @@ impl Assembler (Some(Insn::JoMul(_)), _) | (Some(Insn::PosMarker(_)), Some(Insn::JoMul(_))) => { // Compute the high 64 bits - smulh(cb, Self::SCRATCH0, left.into(), right.into()); + smulh(cb, Self::EMIT0_OPND, left.into(), right.into()); // Compute the low 64 bits // This may clobber one of the input registers, @@ -988,11 +1048,11 @@ impl Assembler // Produce a register that is all zeros or all ones // Based on the sign bit of the 64-bit mul result - asr(cb, Self::SCRATCH1, out.into(), A64Opnd::UImm(63)); + asr(cb, Self::EMIT1_OPND, out.into(), A64Opnd::UImm(63)); // If the high 64-bits are not all zeros or all ones, // matching the sign bit, then we have an overflow - cmp(cb, Self::SCRATCH0, Self::SCRATCH1); + cmp(cb, Self::EMIT0_OPND, Self::EMIT1_OPND); // Insn::JoMul will emit_conditional_jump::<{Condition::NE}> } _ => { @@ -1021,77 +1081,57 @@ impl Assembler Insn::LShift { opnd, shift, out } => { lsl(cb, out.into(), opnd.into(), shift.into()); }, - store_insn @ Insn::Store { dest, src } => { - // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are - // accepted. As a rule of thumb, avoid using Assembler::SCRATCH as a memory - // base register to gurantee things will work. + Insn::Store { dest, src } => { + // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are accepted. let &Opnd::Mem(Mem { num_bits: dest_num_bits, base: MemBase::Reg(base_reg_no), disp }) = dest else { panic!("Unexpected Insn::Store destination in arm64_emit: {dest:?}"); }; - // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG, - // so we panic to get the author to change their code. - #[track_caller] - fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) { - assert_ne!( - backend_use.reg_no, - user_use, - "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics" - ); - } - - // Split src into SCRATCH0 if necessary + // Split src into EMIT0_OPND if necessary let src_reg: A64Reg = match src { Opnd::Reg(reg) => *reg, // Use zero register when possible Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG, // Immediates &Opnd::Imm(imm) => { - assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); - emit_load_value(cb, Self::SCRATCH0, imm as u64); - Self::SCRATCH0_REG + emit_load_value(cb, Self::EMIT0_OPND, imm as u64); + Self::EMIT0_REG } &Opnd::UImm(imm) => { - assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); - emit_load_value(cb, Self::SCRATCH0, imm); - Self::SCRATCH0_REG + emit_load_value(cb, Self::EMIT0_OPND, imm); + Self::EMIT0_REG } &Opnd::Value(value) => { - assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); - emit_load_gc_value(cb, &mut gc_offsets, Self::SCRATCH0, value); - Self::SCRATCH0_REG + emit_load_gc_value(cb, &mut gc_offsets, Self::EMIT0_OPND, value); + Self::EMIT0_REG } src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => { - // For mem-to-mem store, load the source into SCRATCH0 - assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + // For mem-to-mem store, load the source into EMIT0_OPND let src_mem = if mem_disp_fits_bits(src_disp) { src_mem.into() } else { - // Split the load address into SCRATCH0 first if necessary - assert_no_clobber(store_insn, src_base_reg_no, Self::SCRATCH0_REG); - load_effective_address(cb, Self::SCRATCH0, src_base_reg_no, src_disp); - A64Opnd::new_mem(dest_num_bits, Self::SCRATCH0, 0) + // Split the load address into EMIT0_OPND first if necessary + load_effective_address(cb, Self::EMIT0_OPND, src_base_reg_no, src_disp); + A64Opnd::new_mem(dest_num_bits, Self::EMIT0_OPND, 0) }; match src_num_bits { - 64 | 32 => ldur(cb, Self::SCRATCH0, src_mem), - 16 => ldurh(cb, Self::SCRATCH0, src_mem), - 8 => ldurb(cb, Self::SCRATCH0, src_mem), + 64 | 32 => ldur(cb, Self::EMIT0_OPND, src_mem), + 16 => ldurh(cb, Self::EMIT0_OPND, src_mem), + 8 => ldurb(cb, Self::EMIT0_OPND, src_mem), num_bits => panic!("unexpected num_bits: {num_bits}") }; - Self::SCRATCH0_REG + Self::EMIT0_REG } src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}") }; let src = A64Opnd::Reg(src_reg); - // Split dest into SCRATCH1 if necessary. + // Split dest into EMIT1_OPND if necessary. let dest = if mem_disp_fits_bits(disp) { dest.into() } else { - assert_no_clobber(store_insn, src_reg.reg_no, Self::SCRATCH1_REG); - assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH1_REG); - load_effective_address(cb, Self::SCRATCH1, base_reg_no, disp); - A64Opnd::new_mem(dest_num_bits, Self::SCRATCH1, 0) + load_effective_address(cb, Self::EMIT1_OPND, base_reg_no, disp); + A64Opnd::new_mem(dest_num_bits, Self::EMIT1_OPND, 0) }; // This order may be surprising but it is correct. The way @@ -1169,10 +1209,10 @@ impl Assembler if let Target::Label(label_idx) = target { // Set output to the raw address of the label cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| { - adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4))); + adr(cb, Self::EMIT0_OPND, A64Opnd::new_imm(dst_addr - (end_addr - 4))); }); - mov(cb, out.into(), Self::SCRATCH0); + mov(cb, out.into(), Self::EMIT0_OPND); } else { // Set output to the jump target's raw address let target_code = target.unwrap_code_ptr(); @@ -1197,15 +1237,15 @@ impl Assembler } // Push the flags/state register - mrs(cb, Self::SCRATCH0, SystemRegister::NZCV); - emit_push(cb, Self::SCRATCH0); + mrs(cb, Self::EMIT0_OPND, SystemRegister::NZCV); + emit_push(cb, Self::EMIT0_OPND); }, Insn::CPopAll => { let regs = Assembler::get_caller_save_regs(); // Pop the state/flags register - msr(cb, SystemRegister::NZCV, Self::SCRATCH0); - emit_pop(cb, Self::SCRATCH0); + msr(cb, SystemRegister::NZCV, Self::EMIT0_OPND); + emit_pop(cb, Self::EMIT0_OPND); for reg in regs.into_iter().rev() { emit_pop(cb, A64Opnd::Reg(reg)); @@ -1221,8 +1261,8 @@ impl Assembler if b_offset_fits_bits((dst_addr - src_addr) / 4) { bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); } else { - emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); - blr(cb, Self::SCRATCH0); + emit_load_value(cb, Self::EMIT0_OPND, dst_addr as u64); + blr(cb, Self::EMIT0_OPND); } }, Insn::CRet { .. } => { @@ -1302,17 +1342,17 @@ impl Assembler let label = cb.new_label("incr_counter_loop".to_string()); cb.write_label(label); - ldaxr(cb, Self::SCRATCH0, mem.into()); - add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into()); + ldaxr(cb, Self::EMIT0_OPND, mem.into()); + add(cb, Self::EMIT0_OPND, Self::EMIT0_OPND, value.into()); // The status register that gets used to track whether or // not the store was successful must be 32 bytes. Since we - // store the SCRATCH registers as their 64-bit versions, we + // store the EMIT registers as their 64-bit versions, we // need to rewrap it here. - let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32)); - stlxr(cb, status, Self::SCRATCH0, mem.into()); + let status = A64Opnd::Reg(Self::EMIT1_REG.with_num_bits(32)); + stlxr(cb, status, Self::EMIT0_OPND, mem.into()); - cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0)); + cmp(cb, Self::EMIT1_OPND, A64Opnd::new_uimm(0)); emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label)); }, Insn::Breakpoint => { @@ -1363,9 +1403,16 @@ impl Assembler /// Optimize and compile the stored instructions pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Result<(CodePtr, Vec), CompileError> { + // The backend is allowed to use scratch registers only if it has not accepted them so far. + let use_scratch_reg = !self.accept_scratch_reg; + let asm = self.arm64_split(); let mut asm = asm.alloc_regs(regs)?; + // We put compile_side_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits. asm.compile_side_exits(); + if use_scratch_reg { + asm = asm.arm64_split_with_scratch_reg(); + } // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { @@ -1809,12 +1856,39 @@ mod tests { assert_snapshot!(cb.hexdump(), @"50000058030000140010000000000000b00200f8"); } + #[test] + fn test_store_with_valid_scratch_reg() { + let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); + let mut cb = CodeBlock::new_dummy(); + asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into()); + + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm_snapshot!(cb.disasm(), @r" + 0x0: mov x16, #0x3902 + 0x4: movk x16, #8, lsl #16 + 0x8: stur x16, [x15] + "); + assert_snapshot!(cb.hexdump(), @"502087d21001a0f2f00100f8"); + } + + #[test] + #[should_panic] + fn test_store_with_invalid_scratch_reg() { + let (_, scratch_reg) = Assembler::new_with_scratch_reg(); + let (mut asm, mut cb) = setup_asm(); + // This would put the source into scratch_reg, messing up the destination + asm.store(Opnd::mem(64, scratch_reg, 0), 0x83902.into()); + + asm.compile_with_num_regs(&mut cb, 0); + } + #[test] #[should_panic] - fn test_store_unserviceable() { + fn test_load_into_with_invalid_scratch_reg() { + let (_, scratch_reg) = Assembler::new_with_scratch_reg(); let (mut asm, mut cb) = setup_asm(); - // This would put the source into SCRATCH_REG, messing up the destination - asm.store(Opnd::mem(64, SCRATCH_OPND, 0), 0x83902.into()); + // This would put the source into scratch_reg, messing up the destination + asm.load_into(scratch_reg, 0x83902.into()); asm.compile_with_num_regs(&mut cb, 0); } @@ -2300,13 +2374,13 @@ mod tests { asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len()); assert_disasm_snapshot!(cb.disasm(), @" - 0x0: mov x16, x0 + 0x0: mov x15, x0 0x4: mov x0, x1 - 0x8: mov x1, x16 + 0x8: mov x1, x15 0xc: mov x16, #0 0x10: blr x16 "); - assert_snapshot!(cb.hexdump(), @"f00300aae00301aae10310aa100080d200023fd6"); + assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae1030faa100080d200023fd6"); } #[test] @@ -2324,16 +2398,16 @@ mod tests { asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len()); assert_disasm_snapshot!(cb.disasm(), @" - 0x0: mov x16, x2 + 0x0: mov x15, x2 0x4: mov x2, x3 - 0x8: mov x3, x16 - 0xc: mov x16, x0 + 0x8: mov x3, x15 + 0xc: mov x15, x0 0x10: mov x0, x1 - 0x14: mov x1, x16 + 0x14: mov x1, x15 0x18: mov x16, #0 0x1c: blr x16 "); - assert_snapshot!(cb.hexdump(), @"f00302aae20303aae30310aaf00300aae00301aae10310aa100080d200023fd6"); + assert_snapshot!(cb.hexdump(), @"ef0302aae20303aae3030faaef0300aae00301aae1030faa100080d200023fd6"); } #[test] @@ -2350,13 +2424,13 @@ mod tests { asm.compile_with_num_regs(&mut cb, ALLOC_REGS.len()); assert_disasm_snapshot!(cb.disasm(), @" - 0x0: mov x16, x0 + 0x0: mov x15, x0 0x4: mov x0, x1 0x8: mov x1, x2 - 0xc: mov x2, x16 + 0xc: mov x2, x15 0x10: mov x16, #0 0x14: blr x16 "); - assert_snapshot!(cb.hexdump(), @"f00300aae00301aae10302aae20310aa100080d200023fd6"); + assert_snapshot!(cb.hexdump(), @"ef0300aae00301aae10302aae2030faa100080d200023fd6"); } } diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 76a53c66d6b652..aad5600f569767 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -17,7 +17,6 @@ pub use crate::backend::current::{ NATIVE_STACK_PTR, NATIVE_BASE_PTR, C_ARG_OPNDS, C_RET_REG, C_RET_OPND, }; -pub const SCRATCH_OPND: Opnd = Opnd::Reg(Assembler::SCRATCH_REG); pub static JIT_PRESERVED_REGS: &[Opnd] = &[CFP, SP, EC]; @@ -1173,6 +1172,10 @@ pub struct Assembler { /// Names of labels pub(super) label_names: Vec, + /// If true, `push_insn` is allowed to use scratch registers. + /// On `compile`, it also disables the backend's use of them. + pub(super) accept_scratch_reg: bool, + /// If Some, the next ccall should verify its leafness leaf_ccall_stack_size: Option } @@ -1181,12 +1184,12 @@ impl Assembler { /// Create an Assembler pub fn new() -> Self { - Self::new_with_label_names(Vec::default(), 0) + Self::new_with_label_names(Vec::default(), 0, false) } /// Create an Assembler with parameters that are populated by another Assembler instance. /// This API is used for copying an Assembler for the next compiler pass. - pub fn new_with_label_names(label_names: Vec, num_vregs: usize) -> Self { + pub fn new_with_label_names(label_names: Vec, num_vregs: usize, accept_scratch_reg: bool) -> Self { let mut live_ranges = Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY); live_ranges.resize(num_vregs, LiveRange { start: None, end: None }); @@ -1194,6 +1197,7 @@ impl Assembler insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), live_ranges, label_names, + accept_scratch_reg, leaf_ccall_stack_size: None, } } @@ -1255,6 +1259,14 @@ impl Assembler } } + // If this Assembler should not accept scratch registers, assert no use of them. + if !self.accept_scratch_reg { + let opnd_iter = insn.opnd_iter(); + for opnd in opnd_iter { + assert!(!Self::has_scratch_reg(*opnd), "should not use scratch register: {opnd:?}"); + } + } + self.insns.push(insn); } @@ -1268,9 +1280,9 @@ impl Assembler Target::Label(label) } - // Shuffle register moves, sometimes adding extra moves using SCRATCH_REG, + // Shuffle register moves, sometimes adding extra moves using scratch_reg, // so that they will not rewrite each other before they are used. - pub fn resolve_parallel_moves(old_moves: &[(Reg, Opnd)]) -> Vec<(Reg, Opnd)> { + pub fn resolve_parallel_moves(old_moves: &[(Reg, Opnd)], scratch_reg: Option) -> Option> { // Return the index of a move whose destination is not used as a source if any. fn find_safe_move(moves: &[(Reg, Opnd)]) -> Option { moves.iter().enumerate().find(|&(_, &(dest_reg, _))| { @@ -1289,19 +1301,21 @@ impl Assembler new_moves.push(old_moves.remove(index)); } - // No safe move. Load the source of one move into SCRATCH_REG, and - // then load SCRATCH_REG into the destination when it's safe. + // No safe move. Load the source of one move into scratch_reg, and + // then load scratch_reg into the destination when it's safe. if !old_moves.is_empty() { - // Make sure it's safe to use SCRATCH_REG - assert!(old_moves.iter().all(|&(_, opnd)| opnd != SCRATCH_OPND)); + // If scratch_reg is None, return None and leave it to *_split_with_scratch_regs to resolve it. + let scratch_reg = scratch_reg?.unwrap_reg(); + // Make sure it's safe to use scratch_reg + assert!(old_moves.iter().all(|&(_, opnd)| opnd != Opnd::Reg(scratch_reg))); - // Move SCRATCH <- opnd, and delay reg <- SCRATCH + // Move scratch_reg <- opnd, and delay reg <- scratch_reg let (reg, opnd) = old_moves.remove(0); - new_moves.push((Assembler::SCRATCH_REG, opnd)); - old_moves.push((reg, SCRATCH_OPND)); + new_moves.push((scratch_reg, opnd)); + old_moves.push((reg, Opnd::Reg(scratch_reg))); } } - new_moves + Some(new_moves) } /// Sets the out field on the various instructions that require allocated @@ -1345,7 +1359,7 @@ impl Assembler // live_ranges is indexed by original `index` given by the iterator. let live_ranges: Vec = take(&mut self.live_ranges); let mut iterator = self.insns.into_iter().enumerate().peekable(); - let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len()); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len(), self.accept_scratch_reg); while let Some((index, mut insn)) = iterator.next() { let before_ccall = match (&insn, iterator.peek().map(|(_, insn)| insn)) { @@ -1510,9 +1524,14 @@ impl Assembler let is_ccall = matches!(insn, Insn::CCall { .. }); match insn { Insn::ParallelMov { moves } => { - // Now that register allocation is done, it's ready to resolve parallel moves. - for (reg, opnd) in Self::resolve_parallel_moves(&moves) { - asm.load_into(Opnd::Reg(reg), opnd); + // For trampolines that use scratch registers, attempt to lower ParallelMov without scratch_reg. + if let Some(moves) = Self::resolve_parallel_moves(&moves, None) { + for (reg, opnd) in moves { + asm.load_into(Opnd::Reg(reg), opnd); + } + } else { + // If it needs a scratch_reg, leave it to *_split_with_scratch_regs to handle it. + asm.push_insn(Insn::ParallelMov { moves }); } } Insn::CCall { opnds, fptr, start_marker, end_marker, out } => { @@ -1586,7 +1605,7 @@ impl Assembler for (idx, target) in targets { // Compile a side exit. Note that this is past the split pass and alloc_regs(), - // so you can't use a VReg or an instruction that needs to be split. + // so you can't use an instruction that returns a VReg. if let Target::SideExit { pc, stack, locals, reason, label } = target { asm_comment!(self, "Exit: {reason}"); let side_exit_label = if let Some(label) = label { @@ -1609,35 +1628,24 @@ impl Assembler } asm_comment!(self, "save cfp->pc"); - self.load_into(SCRATCH_OPND, Opnd::const_ptr(pc)); - self.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), SCRATCH_OPND); + self.store(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_PC), Opnd::const_ptr(pc)); asm_comment!(self, "save cfp->sp"); - self.lea_into(SCRATCH_OPND, Opnd::mem(64, SP, stack.len() as i32 * SIZEOF_VALUE_I32)); - let cfp_sp = Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP); - self.store(cfp_sp, SCRATCH_OPND); + self.lea_into(Opnd::mem(64, CFP, RUBY_OFFSET_CFP_SP), Opnd::mem(64, SP, stack.len() as i32 * SIZEOF_VALUE_I32)); // Using C_RET_OPND as an additional scratch register, which is no longer used if get_option!(stats) { asm_comment!(self, "increment a side exit counter"); - self.load_into(SCRATCH_OPND, Opnd::const_ptr(exit_counter_ptr(reason))); - self.incr_counter_with_reg(Opnd::mem(64, SCRATCH_OPND, 0), 1.into(), C_RET_OPND); + self.incr_counter(Opnd::const_ptr(exit_counter_ptr(reason)), 1.into()); if let SideExitReason::UnhandledYARVInsn(opcode) = reason { asm_comment!(self, "increment an unhandled YARV insn counter"); - self.load_into(SCRATCH_OPND, Opnd::const_ptr(exit_counter_ptr_for_opcode(opcode))); - self.incr_counter_with_reg(Opnd::mem(64, SCRATCH_OPND, 0), 1.into(), C_RET_OPND); + self.incr_counter(Opnd::const_ptr(exit_counter_ptr_for_opcode(opcode)), 1.into()); } } if get_option!(trace_side_exits) { - // Use `load_into` with `C_ARG_OPNDS` instead of `opnds` argument for ccall, since `compile_side_exits` - // is after the split pass, which would allow use of `opnds`. - self.load_into(C_ARG_OPNDS[0], Opnd::const_ptr(pc as *const u8)); - self.ccall( - rb_zjit_record_exit_stack as *const u8, - vec![] - ); + asm_ccall!(self, rb_zjit_record_exit_stack, Opnd::const_ptr(pc as *const u8)); } asm_comment!(self, "exit to the interpreter"); @@ -1823,19 +1831,6 @@ impl Assembler { self.push_insn(Insn::IncrCounter { mem, value }); } - /// incr_counter() but uses a specific register to split Insn::Lea - pub fn incr_counter_with_reg(&mut self, mem: Opnd, value: Opnd, reg: Opnd) { - assert!(matches!(reg, Opnd::Reg(_)), "incr_counter_with_reg should take a register, got: {reg:?}"); - let counter_opnd = if cfg!(target_arch = "aarch64") { // See arm64_split() - assert_ne!(reg, SCRATCH_OPND, "SCRATCH_REG should be reserved for IncrCounter"); - self.lea_into(reg, mem); - reg - } else { // x86_emit() expects Opnd::Mem - mem - }; - self.incr_counter(counter_opnd, value); - } - pub fn jbe(&mut self, target: Target) { self.push_insn(Insn::Jbe(target)); } @@ -1898,7 +1893,7 @@ impl Assembler { } pub fn lea_into(&mut self, out: Opnd, opnd: Opnd) { - assert!(matches!(out, Opnd::Reg(_)), "Destination of lea_into must be a register, got: {out:?}"); + assert!(matches!(out, Opnd::Reg(_) | Opnd::Mem(_)), "Destination of lea_into must be a register or memory, got: {out:?}"); self.push_insn(Insn::Lea { opnd, out }); } diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 2edd15380871e1..b6c4658463048a 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -83,7 +83,7 @@ impl From<&Opnd> for X86Opnd { /// List of registers that can be used for register allocation. /// This has the same number of registers for x86_64 and arm64. -/// SCRATCH_REG is excluded. +/// SCRATCH_OPND is excluded. pub const ALLOC_REGS: &[Reg] = &[ RDI_REG, RSI_REG, @@ -95,14 +95,26 @@ pub const ALLOC_REGS: &[Reg] = &[ RAX_REG, ]; -impl Assembler -{ - /// Special scratch registers for intermediate processing. - /// This register is call-clobbered (so we don't have to save it before using it). - /// Avoid using if you can since this is used to lower [Insn] internally and - /// so conflicts are possible. - pub const SCRATCH_REG: Reg = R11_REG; - const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); +/// Special scratch register for intermediate processing. It should be used only by +/// [`Assembler::x86_split_with_scratch_reg`] or [`Assembler::new_with_scratch_reg`]. +const SCRATCH_OPND: Opnd = Opnd::Reg(R11_REG); + +impl Assembler { + /// Return an Assembler with scratch registers disabled in the backend, and a scratch register. + pub fn new_with_scratch_reg() -> (Self, Opnd) { + (Self::new_with_label_names(Vec::default(), 0, true), SCRATCH_OPND) + } + + /// Return true if opnd contains a scratch reg + pub fn has_scratch_reg(opnd: Opnd) -> bool { + match opnd { + Opnd::Reg(_) => opnd == SCRATCH_OPND, + Opnd::Mem(Mem { base: MemBase::Reg(reg_no), .. }) => { + reg_no == SCRATCH_OPND.unwrap_reg().reg_no + } + _ => false, + } + } /// Get the list of registers from which we can allocate on this platform pub fn get_alloc_regs() -> Vec { @@ -127,7 +139,7 @@ impl Assembler { let live_ranges: Vec = take(&mut self.live_ranges); let mut iterator = self.insns.into_iter().enumerate().peekable(); - let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len()); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), live_ranges.len(), self.accept_scratch_reg); while let Some((index, mut insn)) = iterator.next() { let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. }); @@ -374,36 +386,165 @@ impl Assembler asm } - /// Emit platform-specific machine code - pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Option> { + /// Split instructions using scratch registers. To maximize the use of the register pool + /// for VRegs, most splits should happen in [`Self::x86_split`]. However, some instructions + /// need to be split with registers after `alloc_regs`, e.g. for `compile_side_exits`, so + /// this splits them and uses scratch registers for it. + pub fn x86_split_with_scratch_reg(mut self) -> Assembler { /// For some instructions, we want to be able to lower a 64-bit operand /// without requiring more registers to be available in the register - /// allocator. So we just use the SCRATCH0 register temporarily to hold + /// allocator. So we just use the SCRATCH_OPND register temporarily to hold /// the value before we immediately use it. - fn emit_64bit_immediate(cb: &mut CodeBlock, opnd: &Opnd) -> X86Opnd { + fn split_64bit_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Imm(value) => { // 32-bit values will be sign-extended - if imm_num_bits(*value) > 32 { - mov(cb, Assembler::SCRATCH0, opnd.into()); - Assembler::SCRATCH0 + if imm_num_bits(value) > 32 { + asm.mov(SCRATCH_OPND, opnd); + SCRATCH_OPND } else { - opnd.into() + opnd } }, Opnd::UImm(value) => { // 32-bit values will be sign-extended - if imm_num_bits(*value as i64) > 32 { - mov(cb, Assembler::SCRATCH0, opnd.into()); - Assembler::SCRATCH0 + if imm_num_bits(value as i64) > 32 { + asm.mov(SCRATCH_OPND, opnd); + SCRATCH_OPND } else { - imm_opnd(*value as i64) + Opnd::Imm(value as i64) } }, - _ => opnd.into() + _ => opnd } } + let mut iterator = self.insns.into_iter().enumerate().peekable(); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), self.live_ranges.len(), true); + + while let Some((_, mut insn)) = iterator.next() { + match &mut insn { + Insn::Add { right, .. } | + Insn::Sub { right, .. } | + Insn::Mul { right, .. } | + Insn::And { right, .. } | + Insn::Or { right, .. } | + Insn::Xor { right, .. } | + Insn::Test { right, .. } => { + *right = split_64bit_immediate(&mut asm, *right); + asm.push_insn(insn); + } + Insn::Cmp { left, right } => { + let num_bits = match right { + Opnd::Imm(value) => Some(imm_num_bits(*value)), + Opnd::UImm(value) => Some(uimm_num_bits(*value)), + _ => None + }; + + // If the immediate is less than 64 bits (like 32, 16, 8), and the operand + // sizes match, then we can represent it as an immediate in the instruction + // without moving it to a register first. + // IOW, 64 bit immediates must always be moved to a register + // before comparisons, where other sizes may be encoded + // directly in the instruction. + let use_imm = num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64; + if !use_imm { + *right = split_64bit_immediate(&mut asm, *right); + } + asm.push_insn(insn); + } + // For compile_side_exits, support splitting simple C arguments here + Insn::CCall { opnds, .. } if !opnds.is_empty() => { + for (i, opnd) in opnds.iter().enumerate() { + asm.load_into(C_ARG_OPNDS[i], *opnd); + } + *opnds = vec![]; + asm.push_insn(insn); + } + &mut Insn::Lea { opnd, out } => { + match (opnd, out) { + // Split here for compile_side_exits + (Opnd::Mem(_), Opnd::Mem(_)) => { + asm.lea_into(SCRATCH_OPND, opnd); + asm.store(out, SCRATCH_OPND); + } + _ => { + asm.push_insn(insn); + } + } + } + Insn::LeaJumpTarget { target, out } => { + if let Target::Label(_) = target { + asm.push_insn(Insn::LeaJumpTarget { out: SCRATCH_OPND, target: target.clone() }); + asm.mov(*out, SCRATCH_OPND); + } + } + // Convert Opnd::const_ptr into Opnd::Mem. This split is done here to give + // a register for compile_side_exits. + &mut Insn::IncrCounter { mem, value } => { + assert!(matches!(mem, Opnd::UImm(_))); + asm.load_into(SCRATCH_OPND, mem); + asm.incr_counter(Opnd::mem(64, SCRATCH_OPND, 0), value); + } + // Resolve ParallelMov that couldn't be handled without a scratch register. + Insn::ParallelMov { moves } => { + for (reg, opnd) in Self::resolve_parallel_moves(&moves, Some(SCRATCH_OPND)).unwrap() { + asm.load_into(Opnd::Reg(reg), opnd); + } + } + // Handle various operand combinations for spills on compile_side_exits. + &mut Insn::Store { dest, src } => { + let Opnd::Mem(Mem { num_bits, .. }) = dest else { + panic!("Unexpected Insn::Store destination in x86_split_with_scratch_reg: {dest:?}"); + }; + + let src = match src { + Opnd::Reg(_) => src, + Opnd::Mem(_) => { + asm.mov(SCRATCH_OPND, src); + SCRATCH_OPND + } + Opnd::Imm(imm) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if num_bits == 64 && imm_num_bits(imm) > 32 { + asm.mov(SCRATCH_OPND, src); + SCRATCH_OPND + } else if uimm_num_bits(imm as u64) <= num_bits { + // If the bit string is short enough for the destination, use the unsigned representation. + // Note that 64-bit and negative values are ruled out. + Opnd::UImm(imm as u64) + } else { + src + } + } + Opnd::UImm(imm) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if num_bits == 64 && imm_num_bits(imm as i64) > 32 { + asm.mov(SCRATCH_OPND, src); + SCRATCH_OPND + } else { + src.into() + } + } + Opnd::Value(_) => { + asm.load_into(SCRATCH_OPND, src); + SCRATCH_OPND + } + src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_split_with_scratch_reg: {src:?}"), + }; + asm.store(dest, src); + } + _ => { + asm.push_insn(insn); + } + } + } + + asm + } + + /// Emit platform-specific machine code + pub fn x86_emit(&mut self, cb: &mut CodeBlock) -> Option> { fn emit_csel( cb: &mut CodeBlock, truthy: Opnd, @@ -506,33 +647,27 @@ impl Assembler } Insn::Add { left, right, .. } => { - let opnd1 = emit_64bit_immediate(cb, right); - add(cb, left.into(), opnd1); + add(cb, left.into(), right.into()); }, Insn::Sub { left, right, .. } => { - let opnd1 = emit_64bit_immediate(cb, right); - sub(cb, left.into(), opnd1); + sub(cb, left.into(), right.into()); }, Insn::Mul { left, right, .. } => { - let opnd1 = emit_64bit_immediate(cb, right); - imul(cb, left.into(), opnd1); + imul(cb, left.into(), right.into()); }, Insn::And { left, right, .. } => { - let opnd1 = emit_64bit_immediate(cb, right); - and(cb, left.into(), opnd1); + and(cb, left.into(), right.into()); }, Insn::Or { left, right, .. } => { - let opnd1 = emit_64bit_immediate(cb, right); - or(cb, left.into(), opnd1); + or(cb, left.into(), right.into()); }, Insn::Xor { left, right, .. } => { - let opnd1 = emit_64bit_immediate(cb, right); - xor(cb, left.into(), opnd1); + xor(cb, left.into(), right.into()); }, Insn::Not { opnd, .. } => { @@ -551,64 +686,6 @@ impl Assembler shr(cb, opnd.into(), shift.into()) }, - store_insn @ Insn::Store { dest, src } => { - let &Opnd::Mem(Mem { num_bits, base: MemBase::Reg(base_reg_no), disp: _ }) = dest else { - panic!("Unexpected Insn::Store destination in x64_emit: {dest:?}"); - }; - - // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG, - // so we panic to get the author to change their code. - #[track_caller] - fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) { - assert_ne!( - backend_use.reg_no, - user_use, - "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics" - ); - } - - let scratch = X86Opnd::Reg(Self::SCRATCH_REG); - let src = match src { - Opnd::Reg(_) => src.into(), - &Opnd::Mem(_) => { - assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); - mov(cb, scratch, src.into()); - scratch - } - &Opnd::Imm(imm) => { - // For 64 bit destinations, 32-bit values will be sign-extended - if num_bits == 64 && imm_num_bits(imm) > 32 { - assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); - mov(cb, scratch, src.into()); - scratch - } else if uimm_num_bits(imm as u64) <= num_bits { - // If the bit string is short enough for the destination, use the unsigned representation. - // Note that 64-bit and negative values are ruled out. - uimm_opnd(imm as u64) - } else { - src.into() - } - } - &Opnd::UImm(imm) => { - // For 64 bit destinations, 32-bit values will be sign-extended - if num_bits == 64 && imm_num_bits(imm as i64) > 32 { - assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); - mov(cb, scratch, src.into()); - scratch - } else { - src.into() - } - } - &Opnd::Value(value) => { - assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); - emit_load_gc_value(cb, &mut gc_offsets, scratch, value); - scratch - } - src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_emit: {src:?}") - }; - mov(cb, dest.into(), src); - } - // This assumes only load instructions can contain references to GC'd Value operands Insn::Load { opnd, out } | Insn::LoadInto { dest: out, opnd } => { @@ -626,6 +703,7 @@ impl Assembler Insn::ParallelMov { .. } => unreachable!("{insn:?} should have been lowered at alloc_regs()"), + Insn::Store { dest, src } | Insn::Mov { dest, src } => { mov(cb, dest.into(), src.into()); }, @@ -638,13 +716,12 @@ impl Assembler // Load address of jump target Insn::LeaJumpTarget { target, out } => { if let Target::Label(label) = target { + let out = *out; // Set output to the raw address of the label - cb.label_ref(*label, 7, |cb, src_addr, dst_addr| { + cb.label_ref(*label, 7, move |cb, src_addr, dst_addr| { let disp = dst_addr - src_addr; - lea(cb, Self::SCRATCH0, mem_opnd(8, RIP, disp.try_into().unwrap())); + lea(cb, out.into(), mem_opnd(8, RIP, disp.try_into().unwrap())); }); - - mov(cb, out.into(), Self::SCRATCH0); } else { // Set output to the jump target's raw address let target_code = target.unwrap_code_ptr(); @@ -700,30 +777,12 @@ impl Assembler // Compare Insn::Cmp { left, right } => { - let num_bits = match right { - Opnd::Imm(value) => Some(imm_num_bits(*value)), - Opnd::UImm(value) => Some(uimm_num_bits(*value)), - _ => None - }; - - // If the immediate is less than 64 bits (like 32, 16, 8), and the operand - // sizes match, then we can represent it as an immediate in the instruction - // without moving it to a register first. - // IOW, 64 bit immediates must always be moved to a register - // before comparisons, where other sizes may be encoded - // directly in the instruction. - if num_bits.is_some() && left.num_bits() == num_bits && num_bits.unwrap() < 64 { - cmp(cb, left.into(), right.into()); - } else { - let emitted = emit_64bit_immediate(cb, right); - cmp(cb, left.into(), emitted); - } + cmp(cb, left.into(), right.into()); } // Test and set flags Insn::Test { left, right } => { - let emitted = emit_64bit_immediate(cb, right); - test(cb, left.into(), emitted); + test(cb, left.into(), right.into()); } Insn::JmpOpnd(opnd) => { @@ -893,9 +952,16 @@ impl Assembler /// Optimize and compile the stored instructions pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec) -> Result<(CodePtr, Vec), CompileError> { + // The backend is allowed to use scratch registers only if it has not accepted them so far. + let use_scratch_regs = !self.accept_scratch_reg; + let asm = self.x86_split(); let mut asm = asm.alloc_regs(regs)?; + // We put compile_side_exits after alloc_regs to avoid extending live ranges for VRegs spilled on side exits. asm.compile_side_exits(); + if use_scratch_regs { + asm = asm.x86_split_with_scratch_reg(); + } // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { @@ -1527,6 +1593,7 @@ mod tests { assert!(imitation_heap_value.heap_object_p()); asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into()); + asm = asm.x86_split_with_scratch_reg(); let gc_offsets = asm.x86_emit(&mut cb).unwrap(); assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset"); diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 262a0361b4a74e..c5bdbcfe0a5a83 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -18,7 +18,7 @@ use crate::state::ZJITState; use crate::stats::{send_fallback_counter, exit_counter_for_compile_error, incr_counter, incr_counter_by, send_fallback_counter_for_method_type, send_without_block_fallback_counter_for_method_type, send_fallback_counter_ptr_for_opcode, CompileError}; use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::{compile_time_ns, exit_compile_error}}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; -use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SCRATCH_OPND, SP}; +use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP}; use crate::hir::{iseq_to_hir, BlockId, BranchEdge, Invariant, RangeType, SideExitReason::{self, *}, SpecialBackrefSymbol, SpecialObjectType}; use crate::hir::{Const, FrameState, Function, Insn, InsnId, SendFallbackReason}; use crate::hir_type::{types, Type}; @@ -1592,9 +1592,7 @@ fn gen_guard_bit_equals(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, /// Generate code that records unoptimized C functions if --zjit-stats is enabled fn gen_incr_counter_ptr(asm: &mut Assembler, counter_ptr: *mut u64) { if get_option!(stats) { - let ptr_reg = asm.load(Opnd::const_ptr(counter_ptr as *const u8)); - let counter_opnd = Opnd::mem(64, ptr_reg, 0); - asm.incr_counter(counter_opnd, Opnd::UImm(1)); + asm.incr_counter(Opnd::const_ptr(counter_ptr as *const u8), Opnd::UImm(1)); } } @@ -1963,12 +1961,12 @@ fn function_stub_hit_body(cb: &mut CodeBlock, iseq_call: &IseqCallRef) -> Result /// Compile a stub for an ISEQ called by SendWithoutBlockDirect fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result { - let mut asm = Assembler::new(); + let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); asm_comment!(asm, "Stub: {}", iseq_get_location(iseq_call.iseq.get(), 0)); // Call function_stub_hit using the shared trampoline. See `gen_function_stub_hit_trampoline`. // Use load_into instead of mov, which is split on arm64, to avoid clobbering ALLOC_REGS. - asm.load_into(SCRATCH_OPND, Opnd::const_ptr(Rc::into_raw(iseq_call))); + asm.load_into(scratch_reg, Opnd::const_ptr(Rc::into_raw(iseq_call))); asm.jmp(ZJITState::get_function_stub_hit_trampoline().into()); asm.compile(cb).map(|(code_ptr, gc_offsets)| { @@ -1979,7 +1977,7 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq_call: IseqCallRef) -> Result Result { - let mut asm = Assembler::new(); + let (mut asm, scratch_reg) = Assembler::new_with_scratch_reg(); asm_comment!(asm, "function_stub_hit trampoline"); // Maintain alignment for x86_64, and set up a frame for arm64 properly @@ -1992,8 +1990,8 @@ pub fn gen_function_stub_hit_trampoline(cb: &mut CodeBlock) -> Result Result Date: Tue, 14 Oct 2025 18:16:07 -0400 Subject: [PATCH 5/6] YJIT: Use `mem::take` over `drain(..).collect()` --- yjit/src/backend/ir.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 40df3ae4d5830b..8205d6de76bd2f 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -1602,7 +1602,7 @@ impl Assembler if c_args.len() > 0 { // Resolve C argument dependencies let c_args_len = c_args.len() as isize; - let moves = Self::reorder_reg_moves(&c_args.drain(..).collect()); + let moves = Self::reorder_reg_moves(&std::mem::take(&mut c_args)); shift_live_ranges(&mut shifted_live_ranges, asm.insns.len(), moves.len() as isize - c_args_len); // Push batched C arguments From df5d63cfa2af8902526d83775bec8192e29fcd1b Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 14 Oct 2025 18:26:08 -0400 Subject: [PATCH 6/6] [DOC] Fix typo in String#partition --- doc/string/partition.rdoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/string/partition.rdoc b/doc/string/partition.rdoc index ece034ee66225e..ee445bd21f32ba 100644 --- a/doc/string/partition.rdoc +++ b/doc/string/partition.rdoc @@ -29,7 +29,7 @@ If +pattern+ is a Regexp, performs the equivalent of self.match(pattern) ["hello", "", ""] If +pattern+ is not a Regexp, converts it to a string (if it is not already one), -then performs the equivalet of self.index(pattern) +then performs the equivalent of self.index(pattern) (and does _not_ set {pattern-matching global variables}[rdoc-ref:globals.md@Pattern+Matching]): 'hello'.partition('h') # => ["", "h", "ello"]