diff --git a/depend b/depend index fa61de77a00e62..5ed27d04e0c38d 100644 --- a/depend +++ b/depend @@ -7393,6 +7393,7 @@ jit.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h jit.$(OBJEXT): $(top_srcdir)/internal/serial.h jit.$(OBJEXT): $(top_srcdir)/internal/set_table.h jit.$(OBJEXT): $(top_srcdir)/internal/static_assert.h +jit.$(OBJEXT): $(top_srcdir)/internal/string.h jit.$(OBJEXT): $(top_srcdir)/internal/variable.h jit.$(OBJEXT): $(top_srcdir)/internal/vm.h jit.$(OBJEXT): $(top_srcdir)/internal/warnings.h diff --git a/doc/string/split.rdoc b/doc/string/split.rdoc index 131c14b83fcbda..9e61bc5bab3751 100644 --- a/doc/string/split.rdoc +++ b/doc/string/split.rdoc @@ -1,99 +1,103 @@ -Returns an array of substrings of +self+ -that are the result of splitting +self+ +Creates an array of substrings by splitting +self+ at each occurrence of the given field separator +field_sep+. -When +field_sep+ is $;: +With no arguments given, +splits using the field separator $;, +whose default value is +nil+. -- If $; is +nil+ (its default value), - the split occurs just as if +field_sep+ were given as a space character - (see below). +With no block given, returns the array of substrings: -- If $; is a string, - the split occurs just as if +field_sep+ were given as that string - (see below). + 'abracadabra'.split('a') # => ["", "br", "c", "d", "br"] -When +field_sep+ is ' ' and +limit+ is +0+ (its default value), -the split occurs at each sequence of whitespace: +When +field_sep+ is +nil+ or ' ' (a single space), +splits at each sequence of whitespace: - 'abc def ghi'.split(' ') # => ["abc", "def", "ghi"] - "abc \n\tdef\t\n ghi".split(' ') # => ["abc", "def", "ghi"] - 'abc def ghi'.split(' ') # => ["abc", "def", "ghi"] + 'foo bar baz'.split(nil) # => ["foo", "bar", "baz"] + 'foo bar baz'.split(' ') # => ["foo", "bar", "baz"] + "foo \n\tbar\t\n baz".split(' ') # => ["foo", "bar", "baz"] + 'foo bar baz'.split(' ') # => ["foo", "bar", "baz"] ''.split(' ') # => [] -When +field_sep+ is a string different from ' ' -and +limit+ is +0+, -the split occurs at each occurrence of +field_sep+; -trailing empty substrings are not returned: +When +field_sep+ is an empty string, +splits at every character: - 'abracadabra'.split('ab') # => ["", "racad", "ra"] - 'aaabcdaaa'.split('a') # => ["", "", "", "bcd"] - ''.split('a') # => [] - '3.14159'.split('1') # => ["3.", "4", "59"] - '!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"] - 'тест'.split('т') # => ["", "ес"] - 'こんにちは'.split('に') # => ["こん", "ちは"] + 'abracadabra'.split('') # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"] + ''.split('') # => [] + 'тест'.split('') # => ["т", "е", "с", "т"] + 'こんにちは'.split('') # => ["こ", "ん", "に", "ち", "は"] -When +field_sep+ is a Regexp and +limit+ is +0+, -the split occurs at each occurrence of a match; -trailing empty substrings are not returned: +When +field_sep+ is a non-empty string and different from ' ' (a single space), +uses that string as the separator: + + 'abracadabra'.split('a') # => ["", "br", "c", "d", "br"] + 'abracadabra'.split('ab') # => ["", "racad", "ra"] + ''.split('a') # => [] + 'тест'.split('т') # => ["", "ес"] + 'こんにちは'.split('に') # => ["こん", "ちは"] + +When +field_sep+ is a Regexp, +splits at each occurrence of a matching substring: 'abracadabra'.split(/ab/) # => ["", "racad", "ra"] - 'aaabcdaaa'.split(/a/) # => ["", "", "", "bcd"] - 'aaabcdaaa'.split(//) # => ["a", "a", "a", "b", "c", "d", "a", "a", "a"] '1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"] + 'abracadabra'.split(//) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"] -If the \Regexp contains groups, their matches are also included +If the \Regexp contains groups, their matches are included in the returned array: '1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"] -As seen above, if +limit+ is +0+, -trailing empty substrings are not returned: +Argument +limit+ sets a limit on the size of the returned array; +it also determines whether trailing empty strings are included in the returned array. - 'aaabcdaaa'.split('a') # => ["", "", "", "bcd"] +When +limit+ is zero, +there is no limit on the size of the array, +but trailing empty strings are omitted: -If +limit+ is positive integer +n+, no more than n - 1- -splits occur, so that at most +n+ substrings are returned, -and trailing empty substrings are included: + 'abracadabra'.split('', 0) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"] + 'abracadabra'.split('a', 0) # => ["", "br", "c", "d", "br"] # Empty string after last 'a' omitted. - 'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"] - 'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"] - 'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"] - 'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""] - 'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""] +When +limit+ is a positive integer, +there is a limit on the size of the array (no more than n - 1 splits occur), +and trailing empty strings are included: -Note that if +field_sep+ is a \Regexp containing groups, -their matches are in the returned array, but do not count toward the limit. + 'abracadabra'.split('', 3) # => ["a", "b", "racadabra"] + 'abracadabra'.split('a', 3) # => ["", "br", "cadabra"] + 'abracadabra'.split('', 30) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a", ""] + 'abracadabra'.split('a', 30) # => ["", "br", "c", "d", "br", ""] + 'abracadabra'.split('', 1) # => ["abracadabra"] + 'abracadabra'.split('a', 1) # => ["abracadabra"] -If +limit+ is negative, it behaves the same as if +limit+ was zero, -meaning that there is no limit, -and trailing empty substrings are included: +When +limit+ is negative, +there is no limit on the size of the array, +and trailing empty strings are omitted: - 'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""] + 'abracadabra'.split('', -1) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a", ""] + 'abracadabra'.split('a', -1) # => ["", "br", "c", "d", "br", ""] If a block is given, it is called with each substring and returns +self+: - 'abc def ghi'.split(' ') {|substring| p substring } + 'foo bar baz'.split(' ') {|substring| p substring } + +Output : + + "foo" + "bar" + "baz" -Output: +Note that the above example is functionally equivalent to: - "abc" - "def" - "ghi" - => "abc def ghi" + 'foo bar baz'.split(' ').each {|substring| p substring } -Note that the above example is functionally the same as calling +#each+ after -+#split+ and giving the same block. However, the above example has better -performance because it avoids the creation of an intermediate array. Also, -note the different return values. +Output : - 'abc def ghi'.split(' ').each {|substring| p substring } + "foo" + "bar" + "baz" -Output: +But the latter: - "abc" - "def" - "ghi" - => ["abc", "def", "ghi"] +- Has poorer performance because it creates an intermediate array. +- Returns an array (instead of +self+). -Related: String#partition, String#rpartition. +Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString]. diff --git a/doc/string/squeeze.rdoc b/doc/string/squeeze.rdoc new file mode 100644 index 00000000000000..1a38c08b3274a9 --- /dev/null +++ b/doc/string/squeeze.rdoc @@ -0,0 +1,33 @@ +Returns a copy of +self+ with each tuple (doubling, tripling, etc.) of specified characters +"squeezed" down to a single character. + +The tuples to be squeezed are specified by arguments +selectors+, +each of which is a string; +see {Character Selectors}[rdoc-ref:character_selectors.rdoc@Character+Selectors]. + +A single argument may be a single character: + + 'Noooooo!'.squeeze('o') # => "No!" + 'foo bar baz'.squeeze(' ') # => "foo bar baz" + 'Mississippi'.squeeze('s') # => "Misisippi" + 'Mississippi'.squeeze('p') # => "Mississipi" + 'Mississippi'.squeeze('x') # => "Mississippi" # Unused selector character is ignored. + 'бессонница'.squeeze('с') # => "бесонница" + 'бессонница'.squeeze('н') # => "бессоница" + +A single argument may be a string of characters: + + 'Mississippi'.squeeze('sp') # => "Misisipi" + 'Mississippi'.squeeze('ps') # => "Misisipi" # Order doesn't matter. + 'Mississippi'.squeeze('nonsense') # => "Misisippi" # Unused selector characters are ignored. + +A single argument may be a range of characters: + + 'Mississippi'.squeeze('a-p') # => "Mississipi" + 'Mississippi'.squeeze('q-z') # => "Misisippi" + 'Mississippi'.squeeze('a-z') # => "Misisipi" + +Multiple arguments are allowed; +see {Multiple Character Selectors}[rdoc-ref:character_selectors.rdoc@Multiple+Character+Selectors]. + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/doc/zjit.md b/doc/zjit.md index f3db94448d4513..a45128adbdd438 100644 --- a/doc/zjit.md +++ b/doc/zjit.md @@ -162,6 +162,16 @@ A file called `zjit_exits_{pid}.dump` will be created in the same directory as ` stackprof path/to/zjit_exits_{pid}.dump ``` +### Printing ZJIT Errors + +`--zjit-debug` prints ZJIT compilation errors and other diagnostics: + +```bash +./miniruby --zjit-debug script.rb +``` + +As you might guess from the name, this option is intended mostly for ZJIT developers. + ## Useful dev commands To view YARV output for code snippets: diff --git a/ext/socket/tcpsocket.c b/ext/socket/tcpsocket.c index 22c9f28ab71703..300a426eda8471 100644 --- a/ext/socket/tcpsocket.c +++ b/ext/socket/tcpsocket.c @@ -12,7 +12,7 @@ /* * call-seq: - * TCPSocket.new(remote_host, remote_port, local_host=nil, local_port=nil, resolv_timeout: nil, connect_timeout: nil, fast_fallback: true) + * TCPSocket.new(remote_host, remote_port, local_host=nil, local_port=nil, resolv_timeout: nil, connect_timeout: nil, open_timeout: nil, fast_fallback: true) * * Opens a TCP connection to +remote_host+ on +remote_port+. If +local_host+ * and +local_port+ are specified, then those parameters are used on the local diff --git a/jit.c b/jit.c index 2ff38c28e2d6d3..43c932e5a00ffa 100644 --- a/jit.c +++ b/jit.c @@ -15,6 +15,7 @@ #include "internal/gc.h" #include "vm_sync.h" #include "internal/fixnum.h" +#include "internal/string.h" enum jit_bindgen_constants { // Field offsets for the RObject struct @@ -180,6 +181,12 @@ rb_jit_get_proc_ptr(VALUE procv) return proc; } +unsigned int +rb_jit_iseq_builtin_attrs(const rb_iseq_t *iseq) +{ + return iseq->body->builtin_attrs; +} + int rb_get_mct_argc(const rb_method_cfunc_t *mct) { @@ -750,3 +757,11 @@ rb_jit_fix_mod_fix(VALUE recv, VALUE obj) { return rb_fix_mod_fix(recv, obj); } + +// YJIT/ZJIT need this function to never allocate and never raise +VALUE +rb_yarv_str_eql_internal(VALUE str1, VALUE str2) +{ + // We wrap this since it's static inline + return rb_str_eql_internal(str1, str2); +} diff --git a/ruby.c b/ruby.c index 05a9fd4191d8bb..0f5e6d60f7f22b 100644 --- a/ruby.c +++ b/ruby.c @@ -916,7 +916,9 @@ moreswitches(const char *s, ruby_cmdline_options_t *opt, int envopt) argc = RSTRING_LEN(argary) / sizeof(ap); ap = 0; rb_str_cat(argary, (char *)&ap, sizeof(ap)); - argv = ptr = ALLOC_N(char *, argc); + + VALUE ptr_obj; + argv = ptr = RB_ALLOCV_N(char *, ptr_obj, argc); MEMMOVE(argv, RSTRING_PTR(argary), char *, argc); while ((i = proc_options(argc, argv, opt, envopt)) > 1 && envopt && (argc -= i) > 0) { @@ -948,7 +950,8 @@ moreswitches(const char *s, ruby_cmdline_options_t *opt, int envopt) opt->crash_report = crash_report; } - ruby_xfree(ptr); + RB_ALLOCV_END(ptr_obj); + /* get rid of GC */ rb_str_resize(argary, 0); rb_str_resize(argstr, 0); diff --git a/string.c b/string.c index 92541622633211..1236057ad177ed 100644 --- a/string.c +++ b/string.c @@ -8971,16 +8971,7 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str) * call-seq: * squeeze(*selectors) -> new_string * - * Returns a copy of +self+ with characters specified by +selectors+ "squeezed" - * (see {Multiple Character Selectors}[rdoc-ref:character_selectors.rdoc@Multiple+Character+Selectors]): - * - * "Squeezed" means that each multiple-character run of a selected character - * is squeezed down to a single character; - * with no arguments given, squeezes all characters: - * - * "yellow moon".squeeze #=> "yelow mon" - * " now is the".squeeze(" ") #=> " now is the" - * "putters shoot balls".squeeze("m-z") #=> "puters shot balls" + * :include: doc/string/squeeze.rdoc * */ @@ -9201,7 +9192,7 @@ literal_split_pattern(VALUE spat, split_type_t default_type) /* * call-seq: - * split(field_sep = $;, limit = 0) -> array + * split(field_sep = $;, limit = 0) -> array_of_substrings * split(field_sep = $;, limit = 0) {|substring| ... } -> self * * :include: doc/string/split.rdoc diff --git a/test/ruby/test_zjit.rb b/test/ruby/test_zjit.rb index e151a022d1bc61..44f010d0561002 100644 --- a/test/ruby/test_zjit.rb +++ b/test/ruby/test_zjit.rb @@ -1962,6 +1962,27 @@ def test }, call_threshold: 2 end + def test_block_given_p + assert_compiles "false", "block_given?" + assert_compiles '[false, false, true]', %q{ + def test = block_given? + [test, test, test{}] + }, call_threshold: 2, insns: [:opt_send_without_block] + end + + def test_block_given_p_from_block + # This will do some EP hopping to find the local EP, + # so it's slightly different than doing it outside of a block. + + assert_compiles '[false, false, true]', %q{ + def test + yield_self { yield_self { block_given? } } + end + + [test, test, test{}] + }, call_threshold: 2 + end + def test_invokeblock_without_block_after_jit_call assert_compiles '"no block given (yield)"', %q{ def test(*arr, &b) diff --git a/yjit.c b/yjit.c index 807aec9e391172..4b78cfbae25a25 100644 --- a/yjit.c +++ b/yjit.c @@ -244,12 +244,6 @@ rb_optimized_call(VALUE *recv, rb_execution_context_t *ec, int argc, VALUE *argv return rb_vm_invoke_proc(ec, proc, argc, argv, kw_splat, block_handler); } -unsigned int -rb_yjit_iseq_builtin_attrs(const rb_iseq_t *iseq) -{ - return iseq->body->builtin_attrs; -} - // If true, the iseq has only opt_invokebuiltin_delegate(_leave) and leave insns. static bool invokebuiltin_delegate_leave_p(const rb_iseq_t *iseq) @@ -283,14 +277,6 @@ rb_yjit_str_simple_append(VALUE str1, VALUE str2) extern VALUE *rb_vm_base_ptr(struct rb_control_frame_struct *cfp); -// YJIT needs this function to never allocate and never raise -VALUE -rb_yarv_str_eql_internal(VALUE str1, VALUE str2) -{ - // We wrap this since it's static inline - return rb_str_eql_internal(str1, str2); -} - VALUE rb_str_neq_internal(VALUE str1, VALUE str2) { diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs index df287e1bf84a2e..100abbb33fc8cb 100644 --- a/yjit/bindgen/src/main.rs +++ b/yjit/bindgen/src/main.rs @@ -249,7 +249,7 @@ fn main() { .allowlist_function("rb_jit_mark_executable") .allowlist_function("rb_jit_mark_unused") .allowlist_function("rb_jit_get_page_size") - .allowlist_function("rb_yjit_iseq_builtin_attrs") + .allowlist_function("rb_jit_iseq_builtin_attrs") .allowlist_function("rb_yjit_iseq_inspect") .allowlist_function("rb_yjit_builtin_function") .allowlist_function("rb_set_cfp_(pc|sp)") diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 231655826109ab..3f6f1bb46e31ec 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -438,7 +438,7 @@ impl<'a> JITState<'a> { fn flush_perf_symbols(&self, cb: &CodeBlock) { assert_eq!(0, self.perf_stack.len()); let path = format!("/tmp/perf-{}.map", std::process::id()); - let mut f = std::fs::File::options().create(true).append(true).open(path).unwrap(); + let mut f = std::io::BufWriter::new(std::fs::File::options().create(true).append(true).open(path).unwrap()); for sym in self.perf_map.borrow().iter() { if let (start, Some(end), name) = sym { // In case the code straddles two pages, part of it belongs to the symbol. @@ -7694,7 +7694,7 @@ fn gen_send_iseq( gen_counter_incr(jit, asm, Counter::num_send_iseq); // Shortcut for special `Primitive.attr! :leaf` builtins - let builtin_attrs = unsafe { rb_yjit_iseq_builtin_attrs(iseq) }; + let builtin_attrs = unsafe { rb_jit_iseq_builtin_attrs(iseq) }; let builtin_func_raw = unsafe { rb_yjit_builtin_function(iseq) }; let builtin_func = if builtin_func_raw.is_null() { None } else { Some(builtin_func_raw) }; let opt_send_call = flags & VM_CALL_OPT_SEND != 0; // .send call is not currently supported for builtins @@ -9635,7 +9635,7 @@ fn gen_invokeblock_specialized( // If the current ISEQ is annotated to be inlined but it's not being inlined here, // generate a dynamic dispatch to avoid making this yield megamorphic. - if unsafe { rb_yjit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() { + if unsafe { rb_jit_iseq_builtin_attrs(jit.iseq) } & BUILTIN_ATTR_INLINE_BLOCK != 0 && !asm.ctx.inline() { gen_counter_incr(jit, asm, Counter::invokeblock_iseq_not_inlined); return None; } diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs index 272586a79f3fb5..f6e4e0e22f4d22 100644 --- a/yjit/src/cruby_bindings.inc.rs +++ b/yjit/src/cruby_bindings.inc.rs @@ -1130,11 +1130,9 @@ extern "C" { kw_splat: ::std::os::raw::c_int, block_handler: VALUE, ) -> VALUE; - pub fn rb_yjit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_yjit_builtin_function(iseq: *const rb_iseq_t) -> *const rb_builtin_function; pub fn rb_yjit_str_simple_append(str1: VALUE, str2: VALUE) -> VALUE; pub fn rb_vm_base_ptr(cfp: *mut rb_control_frame_struct) -> *mut VALUE; - pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; pub fn rb_str_neq_internal(str1: VALUE, str2: VALUE) -> VALUE; pub fn rb_ary_unshift_m(argc: ::std::os::raw::c_int, argv: *mut VALUE, ary: VALUE) -> VALUE; pub fn rb_yjit_rb_ary_subseq_length(ary: VALUE, beg: ::std::os::raw::c_long) -> VALUE; @@ -1199,6 +1197,7 @@ extern "C" { pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID; pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE; pub fn rb_jit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t; + pub fn rb_jit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int; pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void; pub fn rb_get_def_iseq_ptr(def: *mut rb_method_definition_t) -> *const rb_iseq_t; @@ -1274,4 +1273,5 @@ extern "C" { end: *mut ::std::os::raw::c_void, ); pub fn rb_jit_fix_mod_fix(recv: VALUE, obj: VALUE) -> VALUE; + pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; } diff --git a/zjit.rb b/zjit.rb index 88c572849c7529..8b44330b36043a 100644 --- a/zjit.rb +++ b/zjit.rb @@ -103,9 +103,6 @@ def exit_locations # These values are mandatory to include for stackprof, but we don't use them. results[:missed_samples] = 0 results[:gc_samples] = 0 - - results[:frames].reject! { |k, v| v[:samples] == 0 } - results end @@ -156,7 +153,8 @@ def stats_string # Show counters independent from exit_* or dynamic_send_* print_counters_with_prefix(prefix: 'not_inlined_cfuncs_', prompt: 'not inlined C methods', buf:, stats:, limit: 20) - print_counters_with_prefix(prefix: 'not_annotated_cfuncs_', prompt: 'not annotated C methods', buf:, stats:, limit: 20) + # Don't show not_annotated_cfuncs right now because it mostly duplicates not_inlined_cfuncs + # print_counters_with_prefix(prefix: 'not_annotated_cfuncs_', prompt: 'not annotated C methods', buf:, stats:, limit: 20) # Show fallback counters, ordered by the typical amount of fallbacks for the prefix at the time print_counters_with_prefix(prefix: 'unspecialized_send_def_type_', prompt: 'not optimized method types for send', buf:, stats:, limit: 20) diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index 92f7a10e56f97c..75dbd46794abe4 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -274,7 +274,7 @@ fn main() { .allowlist_function("rb_jit_mark_unused") .allowlist_function("rb_jit_get_page_size") .allowlist_function("rb_jit_array_len") - .allowlist_function("rb_zjit_iseq_builtin_attrs") + .allowlist_function("rb_jit_iseq_builtin_attrs") .allowlist_function("rb_zjit_iseq_inspect") .allowlist_function("rb_zjit_iseq_insn_set") .allowlist_function("rb_zjit_local_id") diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 50a7295bbe2672..c00bdb474ecbeb 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -156,10 +156,11 @@ fn gen_iseq_call(cb: &mut CodeBlock, caller_iseq: IseqPtr, iseq_call: &IseqCallR fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) { use std::io::Write; let perf_map = format!("/tmp/perf-{}.map", std::process::id()); - let Ok(mut file) = std::fs::OpenOptions::new().create(true).append(true).open(&perf_map) else { + let Ok(file) = std::fs::OpenOptions::new().create(true).append(true).open(&perf_map) else { debug!("Failed to open perf map file: {perf_map}"); return; }; + let mut file = std::io::BufWriter::new(file); let Ok(_) = writeln!(file, "{:#x} {:#x} zjit::{}", start_ptr, code_size, iseq_name) else { debug!("Failed to write {iseq_name} to perf map file: {perf_map}"); return; @@ -385,7 +386,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio // Ensure we have enough room fit ec, self, and arguments // TODO remove this check when we have stack args (we can use Time.new to test it) Insn::InvokeBuiltin { bf, state, .. } if bf.argc + 2 > (C_ARG_OPNDS.len() as i32) => return Err(*state), - Insn::InvokeBuiltin { bf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, opnds!(args)), + Insn::InvokeBuiltin { bf, leaf, args, state, .. } => gen_invokebuiltin(jit, asm, &function.frame_state(*state), bf, *leaf, opnds!(args)), &Insn::EntryPoint { jit_entry_idx } => no_output!(gen_entry_point(jit, asm, jit_entry_idx)), Insn::Return { val } => no_output!(gen_return(asm, opnd!(val))), Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state)), @@ -448,6 +449,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio Insn::LoadSelf => gen_load_self(), &Insn::LoadIvarEmbedded { self_val, id, index } => gen_load_ivar_embedded(asm, opnd!(self_val), id, index), &Insn::LoadIvarExtended { self_val, id, index } => gen_load_ivar_extended(asm, opnd!(self_val), id, index), + &Insn::IsBlockGiven => gen_is_block_given(jit, asm), &Insn::ArrayMax { state, .. } | &Insn::FixnumDiv { state, .. } | &Insn::Throw { state, .. } @@ -524,6 +526,8 @@ fn gen_defined(jit: &JITState, asm: &mut Assembler, op_type: usize, obj: VALUE, // `yield` goes to the block handler stowed in the "local" iseq which is // the current iseq or a parent. Only the "method" iseq type can be passed a // block handler. (e.g. `yield` in the top level script is a syntax error.) + // + // Similar to gen_is_block_given let local_iseq = unsafe { rb_get_iseq_body_local_iseq(jit.iseq) }; if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD { let lep = gen_get_lep(jit, asm); @@ -549,6 +553,19 @@ fn gen_defined(jit: &JITState, asm: &mut Assembler, op_type: usize, obj: VALUE, } } +/// Similar to gen_defined for DEFINED_YIELD +fn gen_is_block_given(jit: &JITState, asm: &mut Assembler) -> Opnd { + let local_iseq = unsafe { rb_get_iseq_body_local_iseq(jit.iseq) }; + if unsafe { rb_get_iseq_body_type(local_iseq) } == ISEQ_TYPE_METHOD { + let lep = gen_get_lep(jit, asm); + let block_handler = asm.load(Opnd::mem(64, lep, SIZEOF_VALUE_I32 * VM_ENV_DATA_INDEX_SPECVAL)); + asm.cmp(block_handler, VM_BLOCK_HANDLER_NONE.into()); + asm.csel_e(Qfalse.into(), Qtrue.into()) + } else { + Qfalse.into() + } +} + /// Get a local variable from a higher scope or the heap. `local_ep_offset` is in number of VALUEs. /// We generate this instruction with level=0 only when the local variable is on the heap, so we /// can't optimize the level=0 case using the SP register. @@ -623,13 +640,17 @@ fn gen_get_constant_path(jit: &JITState, asm: &mut Assembler, ic: *const iseq_in asm_ccall!(asm, rb_vm_opt_getconstant_path, EC, CFP, Opnd::const_ptr(ic)) } -fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, args: Vec) -> lir::Opnd { +fn gen_invokebuiltin(jit: &JITState, asm: &mut Assembler, state: &FrameState, bf: &rb_builtin_function, leaf: bool, args: Vec) -> lir::Opnd { assert!(bf.argc + 2 <= C_ARG_OPNDS.len() as i32, "gen_invokebuiltin should not be called for builtin function {} with too many arguments: {}", unsafe { std::ffi::CStr::from_ptr(bf.name).to_str().unwrap() }, bf.argc); - // Anything can happen inside builtin functions - gen_prepare_non_leaf_call(jit, asm, state); + if leaf { + gen_prepare_leaf_call_with_gc(asm, state); + } else { + // Anything can happen inside builtin functions + gen_prepare_non_leaf_call(jit, asm, state); + } let mut cargs = vec![EC]; cargs.extend(args); @@ -1611,7 +1632,7 @@ fn gen_guard_type(jit: &mut JITState, asm: &mut Assembler, val: lir::Opnd, guard let tag = asm.and(flags, Opnd::UImm(RUBY_T_MASK as u64)); asm.cmp(tag, Opnd::UImm(RUBY_T_STRING as u64)); asm.jne(side); - } else if guard_type.bit_equal(types::HeapObject) { + } else if guard_type.bit_equal(types::HeapBasicObject) { let side_exit = side_exit(jit, state, GuardType(guard_type)); asm.cmp(val, Opnd::Value(Qfalse)); asm.je(side_exit.clone()); diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index 645891496edbae..d4e4079b5c3c9b 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -1175,6 +1175,11 @@ pub mod test_utils { get_proc_iseq(&format!("{}.method(:{})", recv, name)) } + /// Get IseqPtr for a specified instance method + pub fn get_instance_method_iseq(recv: &str, name: &str) -> *const rb_iseq_t { + get_proc_iseq(&format!("{}.instance_method(:{})", recv, name)) + } + /// Get IseqPtr for a specified Proc object pub fn get_proc_iseq(obj: &str) -> *const rb_iseq_t { let wrapped_iseq = eval(&format!("RubyVM::InstructionSequence.of({obj})")); @@ -1342,6 +1347,7 @@ pub(crate) mod ids { name: NULL content: b"" name: respond_to_missing content: b"respond_to_missing?" name: eq content: b"==" + name: string_eq content: b"String#==" name: include_p content: b"include?" name: to_ary name: to_s diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index c9e5bc8fd1ebcb..f7e6cdde9419b3 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -1363,6 +1363,7 @@ unsafe extern "C" { pub fn rb_get_def_original_id(def: *const rb_method_definition_t) -> ID; pub fn rb_get_def_bmethod_proc(def: *mut rb_method_definition_t) -> VALUE; pub fn rb_jit_get_proc_ptr(procv: VALUE) -> *mut rb_proc_t; + pub fn rb_jit_iseq_builtin_attrs(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_get_mct_argc(mct: *const rb_method_cfunc_t) -> ::std::os::raw::c_int; pub fn rb_get_mct_func(mct: *const rb_method_cfunc_t) -> *mut ::std::os::raw::c_void; pub fn rb_get_def_iseq_ptr(def: *mut rb_method_definition_t) -> *const rb_iseq_t; @@ -1437,4 +1438,5 @@ unsafe extern "C" { start: *mut ::std::os::raw::c_void, end: *mut ::std::os::raw::c_void, ); + pub fn rb_yarv_str_eql_internal(str1: VALUE, str2: VALUE) -> VALUE; } diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index 9d3f5a756b4a47..ee10eaa681c7e4 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -188,11 +188,13 @@ pub fn init() -> Annotations { } annotate!(rb_mKernel, "itself", inline_kernel_itself); + annotate!(rb_mKernel, "block_given?", inline_kernel_block_given_p); annotate!(rb_cString, "bytesize", types::Fixnum, no_gc, leaf); annotate!(rb_cString, "to_s", types::StringExact); annotate!(rb_cString, "getbyte", inline_string_getbyte); annotate!(rb_cString, "empty?", types::BoolExact, no_gc, leaf, elidable); annotate!(rb_cString, "<<", inline_string_append); + annotate!(rb_cString, "==", inline_string_eq); annotate!(rb_cModule, "name", types::StringExact.union(types::NilClass), no_gc, leaf, elidable); annotate!(rb_cModule, "===", types::BoolExact, no_gc, leaf); annotate!(rb_cArray, "length", types::Fixnum, no_gc, leaf, elidable); @@ -246,6 +248,13 @@ fn inline_kernel_itself(_fun: &mut hir::Function, _block: hir::BlockId, recv: hi None } +fn inline_kernel_block_given_p(fun: &mut hir::Function, block: hir::BlockId, _recv: hir::InsnId, args: &[hir::InsnId], _state: hir::InsnId) -> Option { + let &[] = args else { return None; }; + // TODO(max): In local iseq types that are not ISEQ_TYPE_METHOD, rewrite to Constant false. + let result = fun.push_insn(block, hir::Insn::IsBlockGiven); + return Some(result); +} + fn inline_array_aref(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { if let &[index] = args { if fun.likely_a(index, types::Fixnum, state) { @@ -291,6 +300,26 @@ fn inline_string_append(fun: &mut hir::Function, block: hir::BlockId, recv: hir: } } +fn inline_string_eq(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { + let &[other] = args else { return None; }; + if fun.likely_a(recv, types::String, state) && fun.likely_a(other, types::String, state) { + let recv = fun.coerce_to(block, recv, types::String, state); + let other = fun.coerce_to(block, other, types::String, state); + let return_type = types::BoolExact; + let elidable = true; + // TODO(max): Make StringEqual its own opcode so that we can later constant-fold StringEqual(a, a) => true + let result = fun.push_insn(block, hir::Insn::CCall { + cfunc: rb_yarv_str_eql_internal as *const u8, + args: vec![recv, other], + name: ID!(string_eq), + return_type, + elidable, + }); + return Some(result); + } + None +} + fn inline_integer_succ(fun: &mut hir::Function, block: hir::BlockId, recv: hir::InsnId, args: &[hir::InsnId], state: hir::InsnId) -> Option { if !args.is_empty() { return None; } if fun.likely_a(recv, types::Fixnum, state) { diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index e8a366ca6c45f2..834a33d23c33d2 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -610,8 +610,12 @@ pub enum Insn { IsMethodCfunc { val: InsnId, cd: *const rb_call_data, cfunc: *const u8, state: InsnId }, /// Return C `true` if left == right IsBitEqual { left: InsnId, right: InsnId }, + // TODO(max): In iseq body types that are not ISEQ_TYPE_METHOD, rewrite to Constant false. Defined { op_type: usize, obj: VALUE, pushval: VALUE, v: InsnId, state: InsnId }, GetConstantPath { ic: *const iseq_inline_constant_cache, state: InsnId }, + /// Kernel#block_given? but without pushing a frame. Similar to [`Insn::Defined`] with + /// `DEFINED_YIELD` + IsBlockGiven, /// Get a global variable named `id` GetGlobal { id: ID, state: InsnId }, @@ -741,6 +745,7 @@ pub enum Insn { bf: rb_builtin_function, args: Vec, state: InsnId, + leaf: bool, return_type: Option, // None for unannotated builtins }, @@ -870,6 +875,7 @@ impl Insn { Insn::NewRange { .. } => true, Insn::NewRangeFixnum { .. } => false, Insn::StringGetbyteFixnum { .. } => false, + Insn::IsBlockGiven => false, _ => true, } } @@ -1034,8 +1040,10 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { } Ok(()) } - Insn::InvokeBuiltin { bf, args, .. } => { - write!(f, "InvokeBuiltin {}", unsafe { CStr::from_ptr(bf.name) }.to_str().unwrap())?; + Insn::InvokeBuiltin { bf, args, leaf, .. } => { + write!(f, "InvokeBuiltin{} {}", + if *leaf { " leaf" } else { "" }, + unsafe { CStr::from_ptr(bf.name) }.to_str().unwrap())?; for arg in args { write!(f, ", {arg}")?; } @@ -1065,6 +1073,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> { Insn::GuardBlockParamProxy { level, .. } => write!(f, "GuardBlockParamProxy l{level}"), Insn::PatchPoint { invariant, .. } => { write!(f, "PatchPoint {}", invariant.print(self.ptr_map)) }, Insn::GetConstantPath { ic, .. } => { write!(f, "GetConstantPath {:p}", self.ptr_map.map_ptr(ic)) }, + Insn::IsBlockGiven => { write!(f, "IsBlockGiven") }, Insn::CCall { cfunc, args, name, return_type: _, elidable: _ } => { write!(f, "CCall {}@{:p}", name.contents_lossy(), self.ptr_map.map_ptr(cfunc))?; for arg in args { @@ -1562,6 +1571,7 @@ impl Function { result@(Const {..} | Param {..} | GetConstantPath {..} + | IsBlockGiven | PatchPoint {..} | PutSpecialObject {..} | GetGlobal {..} @@ -1671,7 +1681,7 @@ impl Function { state, reason, }, - &InvokeBuiltin { bf, ref args, state, return_type } => InvokeBuiltin { bf, args: find_vec!(args), state, return_type }, + &InvokeBuiltin { bf, ref args, state, leaf, return_type } => InvokeBuiltin { bf, args: find_vec!(args), state, leaf, return_type }, &ArrayDup { val, state } => ArrayDup { val: find!(val), state }, &HashDup { val, state } => HashDup { val: find!(val), state }, &HashAref { hash, key, state } => HashAref { hash: find!(hash), key: find!(key), state }, @@ -1794,7 +1804,7 @@ impl Function { Insn::HashDup { .. } => types::HashExact, Insn::NewRange { .. } => types::RangeExact, Insn::NewRangeFixnum { .. } => types::RangeExact, - Insn::ObjectAlloc { .. } => types::HeapObject, + Insn::ObjectAlloc { .. } => types::HeapBasicObject, Insn::ObjectAllocClass { class, .. } => Type::from_class(*class), &Insn::CCallWithFrame { return_type, .. } => return_type, Insn::CCall { return_type, .. } => *return_type, @@ -1828,6 +1838,7 @@ impl Function { Insn::Defined { pushval, .. } => Type::from_value(*pushval).union(types::NilClass), Insn::DefinedIvar { pushval, .. } => Type::from_value(*pushval).union(types::NilClass), Insn::GetConstantPath { .. } => types::BasicObject, + Insn::IsBlockGiven { .. } => types::BoolExact, Insn::ArrayMax { .. } => types::BasicObject, Insn::GetGlobal { .. } => types::BasicObject, Insn::GetIvar { .. } => types::BasicObject, @@ -2474,7 +2485,7 @@ impl Function { // too-complex shapes can't use index access self.push_insn_id(block, insn_id); continue; } - let self_val = self.push_insn(block, Insn::GuardType { val: self_val, guard_type: types::HeapObject, state }); + let self_val = self.push_insn(block, Insn::GuardType { val: self_val, guard_type: types::HeapBasicObject, state }); let self_val = self.push_insn(block, Insn::GuardShape { val: self_val, shape: recv_type.shape(), state }); let mut ivar_index: u16 = 0; let replacement = if ! unsafe { rb_shape_get_iv_index(recv_type.shape().0, id, &mut ivar_index) } { @@ -3009,6 +3020,7 @@ impl Function { | &Insn::LoadSelf | &Insn::GetLocal { .. } | &Insn::PutSpecialObject { .. } + | &Insn::IsBlockGiven | &Insn::IncrCounter(_) | &Insn::IncrCounterPtr { .. } => {} @@ -4662,10 +4674,14 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { .get_builtin_properties(&bf) .map(|props| props.return_type); + let builtin_attrs = unsafe { rb_jit_iseq_builtin_attrs(iseq) }; + let leaf = builtin_attrs & BUILTIN_ATTR_LEAF != 0; + let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { bf, args, state: exit_id, + leaf, return_type, }); state.stack_push(insn_id); @@ -4688,10 +4704,14 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result { .get_builtin_properties(&bf) .map(|props| props.return_type); + let builtin_attrs = unsafe { rb_jit_iseq_builtin_attrs(iseq) }; + let leaf = builtin_attrs & BUILTIN_ATTR_LEAF != 0; + let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { bf, args, state: exit_id, + leaf, return_type, }); state.stack_push(insn_id); @@ -7089,7 +7109,7 @@ mod tests { v12:NilClass = Const Value(nil) v14:CBool = IsMethodCFunc v11, :new IfFalse v14, bb3(v6, v12, v11) - v16:HeapObject = ObjectAlloc v11 + v16:HeapBasicObject = ObjectAlloc v11 v18:BasicObject = SendWithoutBlock v16, :initialize CheckInterrupts Jump bb4(v6, v16, v18) @@ -7919,7 +7939,7 @@ mod tests { EntryPoint JIT(0) Jump bb2(v4) bb2(v6:BasicObject): - v11:Class = InvokeBuiltin _bi20, v6 + v11:Class = InvokeBuiltin leaf _bi20, v6 Jump bb3(v6, v11) bb3(v13:BasicObject, v14:Class): CheckInterrupts @@ -8017,6 +8037,50 @@ mod tests { "); } + #[test] + fn test_invoke_leaf_builtin_symbol_name() { + let iseq = crate::cruby::with_rubyvm(|| get_instance_method_iseq("Symbol", "name")); + let function = iseq_to_hir(iseq).unwrap(); + assert_snapshot!(hir_string_function(&function), @r" + fn name@: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:BasicObject = InvokeBuiltin leaf _bi28, v6 + Jump bb3(v6, v11) + bb3(v13:BasicObject, v14:BasicObject): + CheckInterrupts + Return v14 + "); + } + + #[test] + fn test_invoke_leaf_builtin_symbol_to_s() { + let iseq = crate::cruby::with_rubyvm(|| get_instance_method_iseq("Symbol", "to_s")); + let function = iseq_to_hir(iseq).unwrap(); + assert_snapshot!(hir_string_function(&function), @r" + fn to_s@: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + v11:BasicObject = InvokeBuiltin leaf _bi12, v6 + Jump bb3(v6, v11) + bb3(v13:BasicObject, v14:BasicObject): + CheckInterrupts + Return v14 + "); + } + #[test] fn dupn() { eval(" @@ -8492,13 +8556,23 @@ mod opt_tests { use super::tests::assert_contains_opcode; #[track_caller] - fn hir_string(method: &str) -> String { - let iseq = crate::cruby::with_rubyvm(|| get_method_iseq("self", method)); + fn hir_string_function(function: &Function) -> String { + format!("{}", FunctionPrinter::without_snapshot(function)) + } + + #[track_caller] + fn hir_string_proc(proc: &str) -> String { + let iseq = crate::cruby::with_rubyvm(|| get_proc_iseq(proc)); unsafe { crate::cruby::rb_zjit_profile_disable(iseq) }; let mut function = iseq_to_hir(iseq).unwrap(); function.optimize(); function.validate().unwrap(); - format!("{}", FunctionPrinter::without_snapshot(&function)) + hir_string_function(&function) + } + + #[track_caller] + fn hir_string(method: &str) -> String { + hir_string_proc(&format!("{}.method(:{})", "self", method)) } #[test] @@ -10608,7 +10682,7 @@ mod opt_tests { bb2(v6:BasicObject): PatchPoint SingleRactorMode PatchPoint StableConstantNames(0x1000, MY_MODULE) - v19:HeapObject[VALUE(0x1008)] = Const Value(VALUE(0x1008)) + v19:ModuleSubclass[VALUE(0x1008)] = Const Value(VALUE(0x1008)) CheckInterrupts Return v19 "); @@ -10671,6 +10745,87 @@ mod opt_tests { "); } + #[test] + fn test_inline_kernel_block_given_p() { + eval(" + def test = block_given? + test + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint MethodRedefined(Object@0x1000, block_given?@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(Object@0x1000) + v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + v21:BoolExact = IsBlockGiven + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v21 + "); + } + + #[test] + fn test_inline_kernel_block_given_p_in_block() { + eval(" + TEST = proc { block_given? } + TEST.call + "); + assert_snapshot!(hir_string_proc("TEST"), @r" + fn block in @:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint MethodRedefined(Object@0x1000, block_given?@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(Object@0x1000) + v20:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + v21:BoolExact = IsBlockGiven + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v21 + "); + } + + #[test] + fn test_elide_kernel_block_given_p() { + eval(" + def test + block_given? + 5 + end + test + "); + assert_snapshot!(hir_string("test"), @r" + fn test@:3: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + Jump bb2(v1) + bb1(v4:BasicObject): + EntryPoint JIT(0) + Jump bb2(v4) + bb2(v6:BasicObject): + PatchPoint MethodRedefined(Object@0x1000, block_given?@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(Object@0x1000) + v23:HeapObject[class_exact*:Object@VALUE(0x1000)] = GuardType v6, HeapObject[class_exact*:Object@VALUE(0x1000)] + IncrCounter inline_cfunc_optimized_send_count + v14:Fixnum[5] = Const Value(5) + CheckInterrupts + Return v14 + "); + } + #[test] fn const_send_direct_integer() { eval(" @@ -11283,7 +11438,7 @@ mod opt_tests { v40:Class[VALUE(0x1008)] = Const Value(VALUE(0x1008)) v12:NilClass = Const Value(nil) PatchPoint MethodRedefined(Set@0x1008, new@0x1010, cme:0x1018) - v16:HeapObject = ObjectAlloc v40 + v16:HeapBasicObject = ObjectAlloc v40 PatchPoint MethodRedefined(Set@0x1008, initialize@0x1040, cme:0x1048) PatchPoint NoSingletonClass(Set@0x1008) v46:SetExact = GuardType v16, SetExact @@ -13556,7 +13711,7 @@ mod opt_tests { bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): PatchPoint MethodRedefined(C@0x1000, []@0x1008, cme:0x1010) PatchPoint NoSingletonClass(C@0x1000) - v28:HeapObject[class_exact:C] = GuardType v11, HeapObject[class_exact:C] + v28:ArraySubclass[class_exact:C] = GuardType v11, ArraySubclass[class_exact:C] v29:Fixnum = GuardType v12, Fixnum v30:BasicObject = ArrayArefFixnum v28, v29 IncrCounter inline_cfunc_optimized_send_count @@ -13650,7 +13805,7 @@ mod opt_tests { bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): PatchPoint MethodRedefined(C@0x1000, []@0x1008, cme:0x1010) PatchPoint NoSingletonClass(C@0x1000) - v28:HeapObject[class_exact:C] = GuardType v11, HeapObject[class_exact:C] + v28:HashSubclass[class_exact:C] = GuardType v11, HashSubclass[class_exact:C] v29:BasicObject = HashAref v28, v12 IncrCounter inline_cfunc_optimized_send_count CheckInterrupts @@ -14090,7 +14245,6 @@ mod opt_tests { "); } - // TODO: Should be optimized, but is waiting on String#== inlining #[test] fn test_optimize_string_append_string_subclass() { eval(r#" @@ -14114,9 +14268,11 @@ mod opt_tests { PatchPoint MethodRedefined(String@0x1000, <<@0x1008, cme:0x1010) PatchPoint NoSingletonClass(String@0x1000) v28:StringExact = GuardType v11, StringExact - v29:BasicObject = CCallWithFrame <<@0x1038, v28, v12 + v29:String = GuardType v12, String + v30:StringExact = StringAppend v28, v29 + IncrCounter inline_cfunc_optimized_send_count CheckInterrupts - Return v29 + Return v28 "); } @@ -14142,7 +14298,7 @@ mod opt_tests { bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): PatchPoint MethodRedefined(MyString@0x1000, <<@0x1008, cme:0x1010) PatchPoint NoSingletonClass(MyString@0x1000) - v28:HeapObject[class_exact:MyString] = GuardType v11, HeapObject[class_exact:MyString] + v28:StringSubclass[class_exact:MyString] = GuardType v11, StringSubclass[class_exact:MyString] v29:BasicObject = CCallWithFrame <<@0x1038, v28, v12 CheckInterrupts Return v29 @@ -15015,4 +15171,186 @@ mod opt_tests { Return v21 "); } + + #[test] + fn test_optimize_stringexact_eq_stringexact() { + eval(r#" + def test(l, r) = l == r + test("a", "b") + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@5 + v3:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject): + EntryPoint JIT(0) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): + PatchPoint MethodRedefined(String@0x1000, ==@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(String@0x1000) + v28:StringExact = GuardType v11, StringExact + v29:String = GuardType v12, String + v30:BoolExact = CCall String#==@0x1038, v28, v29 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v30 + "); + } + + #[test] + fn test_optimize_string_eq_string() { + eval(r#" + class C < String + end + def test(l, r) = l == r + test(C.new("a"), C.new("b")) + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@5 + v3:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject): + EntryPoint JIT(0) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): + PatchPoint MethodRedefined(C@0x1000, ==@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(C@0x1000) + v28:StringSubclass[class_exact:C] = GuardType v11, StringSubclass[class_exact:C] + v29:String = GuardType v12, String + v30:BoolExact = CCall String#==@0x1038, v28, v29 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v30 + "); + } + + #[test] + fn test_optimize_stringexact_eq_string() { + eval(r#" + class C < String + end + def test(l, r) = l == r + test("a", C.new("b")) + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@5 + v3:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject): + EntryPoint JIT(0) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): + PatchPoint MethodRedefined(String@0x1000, ==@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(String@0x1000) + v28:StringExact = GuardType v11, StringExact + v29:String = GuardType v12, String + v30:BoolExact = CCall String#==@0x1038, v28, v29 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v30 + "); + } + + #[test] + fn test_optimize_stringexact_eqq_stringexact() { + eval(r#" + def test(l, r) = l === r + test("a", "b") + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:2: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@5 + v3:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject): + EntryPoint JIT(0) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): + PatchPoint MethodRedefined(String@0x1000, ===@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(String@0x1000) + v26:StringExact = GuardType v11, StringExact + v27:String = GuardType v12, String + v28:BoolExact = CCall String#==@0x1038, v26, v27 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v28 + "); + } + + #[test] + fn test_optimize_string_eqq_string() { + eval(r#" + class C < String + end + def test(l, r) = l === r + test(C.new("a"), C.new("b")) + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@5 + v3:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject): + EntryPoint JIT(0) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): + PatchPoint MethodRedefined(C@0x1000, ===@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(C@0x1000) + v26:StringSubclass[class_exact:C] = GuardType v11, StringSubclass[class_exact:C] + v27:String = GuardType v12, String + v28:BoolExact = CCall String#==@0x1038, v26, v27 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v28 + "); + } + + #[test] + fn test_optimize_stringexact_eqq_string() { + eval(r#" + class C < String + end + def test(l, r) = l === r + test("a", C.new("b")) + "#); + assert_snapshot!(hir_string("test"), @r" + fn test@:4: + bb0(): + EntryPoint interpreter + v1:BasicObject = LoadSelf + v2:BasicObject = GetLocal l0, SP@5 + v3:BasicObject = GetLocal l0, SP@4 + Jump bb2(v1, v2, v3) + bb1(v6:BasicObject, v7:BasicObject, v8:BasicObject): + EntryPoint JIT(0) + Jump bb2(v6, v7, v8) + bb2(v10:BasicObject, v11:BasicObject, v12:BasicObject): + PatchPoint MethodRedefined(String@0x1000, ===@0x1008, cme:0x1010) + PatchPoint NoSingletonClass(String@0x1000) + v26:StringExact = GuardType v11, StringExact + v27:String = GuardType v12, String + v28:BoolExact = CCall String#==@0x1038, v26, v27 + IncrCounter inline_cfunc_optimized_send_count + CheckInterrupts + Return v28 + "); + } } diff --git a/zjit/src/hir_type/gen_hir_type.rb b/zjit/src/hir_type/gen_hir_type.rb index 1ab6adf2eb16f1..653efa6f8f2544 100644 --- a/zjit/src/hir_type/gen_hir_type.rb +++ b/zjit/src/hir_type/gen_hir_type.rb @@ -58,20 +58,26 @@ def to_graphviz type $subclass = [basic_object_subclass.name, object_subclass.name] $builtin_exact = [basic_object_exact.name, object_exact.name] -$c_names = { +$exact_c_names = { "ObjectExact" => "rb_cObject", "BasicObjectExact" => "rb_cBasicObject", } +$inexact_c_names = { + "Object" => "rb_cObject", + "BasicObject" => "rb_cBasicObject", +} + # Define a new type that can be subclassed (most of them). # If c_name is given, mark the rb_cXYZ object as equivalent to this exact type. def base_type name, c_name: nil type = $object.subtype name exact = type.subtype(name+"Exact") + subclass = type.subtype(name+"Subclass") if c_name - $c_names[exact.name] = c_name + $exact_c_names[exact.name] = c_name + $inexact_c_names[subclass.name] = c_name end - subclass = type.subtype(name+"Subclass") $builtin_exact << exact.name $subclass << subclass.name [type, exact] @@ -81,7 +87,7 @@ def base_type name, c_name: nil # If c_name is given, mark the rb_cXYZ object as equivalent to this type. def final_type name, base: $object, c_name: nil if c_name - $c_names[name] = c_name + $exact_c_names[name] = c_name end type = base.subtype name $builtin_exact << type.name @@ -171,8 +177,10 @@ def add_union name, type_names add_union "Subclass", $subclass add_union "BoolExact", [true_exact.name, false_exact.name] add_union "Immediate", [fixnum.name, flonum.name, static_sym.name, nil_exact.name, true_exact.name, false_exact.name, undef_.name] -$bits["HeapObject"] = ["BasicObject & !Immediate"] -$numeric_bits["HeapObject"] = $numeric_bits["BasicObject"] & ~$numeric_bits["Immediate"] +$bits["HeapBasicObject"] = ["BasicObject & !Immediate"] +$numeric_bits["HeapBasicObject"] = $numeric_bits["BasicObject"] & ~$numeric_bits["Immediate"] +$bits["HeapObject"] = ["Object & !Immediate"] +$numeric_bits["HeapObject"] = $numeric_bits["Object"] & ~$numeric_bits["Immediate"] # ===== Finished generating the DAG; write Rust code ===== @@ -198,8 +206,14 @@ def add_union name, type_names $bits.keys.sort.map {|type_name| puts " pub const #{type_name}: Type = Type::from_bits(bits::#{type_name});" } -puts " pub const ExactBitsAndClass: [(u64, *const VALUE); #{$c_names.size}] = [" -$c_names.each {|type_name, c_name| +puts " pub const ExactBitsAndClass: [(u64, *const VALUE); #{$exact_c_names.size}] = [" +$exact_c_names.each {|type_name, c_name| + puts " (bits::#{type_name}, &raw const crate::cruby::#{c_name})," +} +puts " ];" +$inexact_c_names = $inexact_c_names.to_a.sort_by {|name, _| $bits[name]}.to_h +puts " pub const InexactBitsAndClass: [(u64, *const VALUE); #{$inexact_c_names.size}] = [" +$inexact_c_names.each {|type_name, c_name| puts " (bits::#{type_name}, &raw const crate::cruby::#{c_name})," } puts " ];" diff --git a/zjit/src/hir_type/hir_type.inc.rs b/zjit/src/hir_type/hir_type.inc.rs index c392735742d386..bad45a737644fb 100644 --- a/zjit/src/hir_type/hir_type.inc.rs +++ b/zjit/src/hir_type/hir_type.inc.rs @@ -37,8 +37,9 @@ mod bits { pub const Hash: u64 = HashExact | HashSubclass; pub const HashExact: u64 = 1u64 << 23; pub const HashSubclass: u64 = 1u64 << 24; + pub const HeapBasicObject: u64 = BasicObject & !Immediate; pub const HeapFloat: u64 = 1u64 << 25; - pub const HeapObject: u64 = BasicObject & !Immediate; + pub const HeapObject: u64 = Object & !Immediate; pub const Immediate: u64 = FalseClass | Fixnum | Flonum | NilClass | StaticSymbol | TrueClass | Undef; pub const Integer: u64 = Bignum | Fixnum; pub const Module: u64 = Class | ModuleExact | ModuleSubclass; @@ -69,7 +70,7 @@ mod bits { pub const Symbol: u64 = DynamicSymbol | StaticSymbol; pub const TrueClass: u64 = 1u64 << 42; pub const Undef: u64 = 1u64 << 43; - pub const AllBitPatterns: [(&'static str, u64); 69] = [ + pub const AllBitPatterns: [(&'static str, u64); 70] = [ ("Any", Any), ("RubyValue", RubyValue), ("Immediate", Immediate), @@ -79,6 +80,7 @@ mod bits { ("BuiltinExact", BuiltinExact), ("BoolExact", BoolExact), ("TrueClass", TrueClass), + ("HeapBasicObject", HeapBasicObject), ("HeapObject", HeapObject), ("String", String), ("Subclass", Subclass), @@ -181,6 +183,7 @@ pub mod types { pub const Hash: Type = Type::from_bits(bits::Hash); pub const HashExact: Type = Type::from_bits(bits::HashExact); pub const HashSubclass: Type = Type::from_bits(bits::HashSubclass); + pub const HeapBasicObject: Type = Type::from_bits(bits::HeapBasicObject); pub const HeapFloat: Type = Type::from_bits(bits::HeapFloat); pub const HeapObject: Type = Type::from_bits(bits::HeapObject); pub const Immediate: Type = Type::from_bits(bits::Immediate); @@ -232,4 +235,16 @@ pub mod types { (bits::TrueClass, &raw const crate::cruby::rb_cTrueClass), (bits::FalseClass, &raw const crate::cruby::rb_cFalseClass), ]; + pub const InexactBitsAndClass: [(u64, *const VALUE); 10] = [ + (bits::ArraySubclass, &raw const crate::cruby::rb_cArray), + (bits::HashSubclass, &raw const crate::cruby::rb_cHash), + (bits::ModuleSubclass, &raw const crate::cruby::rb_cModule), + (bits::NumericSubclass, &raw const crate::cruby::rb_cNumeric), + (bits::RangeSubclass, &raw const crate::cruby::rb_cRange), + (bits::RegexpSubclass, &raw const crate::cruby::rb_cRegexp), + (bits::SetSubclass, &raw const crate::cruby::rb_cSet), + (bits::StringSubclass, &raw const crate::cruby::rb_cString), + (bits::Object, &raw const crate::cruby::rb_cObject), + (bits::BasicObject, &raw const crate::cruby::rb_cBasicObject), + ]; } diff --git a/zjit/src/hir_type/mod.rs b/zjit/src/hir_type/mod.rs index f24161657e8ec7..7e6da62fd0ff44 100644 --- a/zjit/src/hir_type/mod.rs +++ b/zjit/src/hir_type/mod.rs @@ -185,6 +185,14 @@ impl Type { .map(|&(bits, _)| bits) } + fn bits_from_subclass(class: VALUE) -> Option { + types::InexactBitsAndClass + .iter() + .find(|&(_, class_object)| class.is_subclass_of(unsafe { **class_object }) == ClassRelationship::Subclass) + // Can't be an immediate if it's a subclass. + .map(|&(bits, _)| bits & !bits::Immediate) + } + fn from_heap_object(val: VALUE) -> Type { assert!(!val.special_const_p(), "val should be a heap object"); let bits = @@ -199,12 +207,11 @@ impl Type { else if val.class_of() == unsafe { rb_cInteger } { bits::Bignum } else if val.class_of() == unsafe { rb_cFloat } { bits::HeapFloat } else if val.class_of() == unsafe { rb_cSymbol } { bits::DynamicSymbol } + else if let Some(bits) = Self::bits_from_exact_class(val.class_of()) { bits } + else if let Some(bits) = Self::bits_from_subclass(val.class_of()) { bits } else { - Self::bits_from_exact_class(val.class_of()).unwrap_or({ - // We don't have a specific built-in bit pattern for this class, so generalize - // as HeapObject with object specialization. - bits::HeapObject - }) + unreachable!("Class {} is not a subclass of BasicObject! Don't know what to do.", + get_class_name(val.class_of())) }; let spec = Specialization::Object(val); Type { bits, spec } @@ -266,14 +273,14 @@ impl Type { } pub fn from_class(class: VALUE) -> Type { - match Self::bits_from_exact_class(class) { - Some(bits) => Type::from_bits(bits), - None => { - // We don't have a specific built-in bit pattern for this class, so generalize - // as HeapObject with object specialization. - Type { bits: bits::HeapObject, spec: Specialization::TypeExact(class) } - } + if let Some(bits) = Self::bits_from_exact_class(class) { + return Type::from_bits(bits); + } + if let Some(bits) = Self::bits_from_subclass(class) { + return Type { bits, spec: Specialization::TypeExact(class) } } + unreachable!("Class {} is not a subclass of BasicObject! Don't know what to do.", + get_class_name(class)) } /// Private. Only for creating type globals. @@ -613,6 +620,32 @@ mod tests { assert_not_subtype(types::HeapFloat, types::Immediate); } + #[test] + fn heap_basic_object() { + assert_not_subtype(Type::fixnum(123), types::HeapBasicObject); + assert_not_subtype(types::Fixnum, types::HeapBasicObject); + assert_subtype(types::Bignum, types::HeapBasicObject); + assert_not_subtype(types::Integer, types::HeapBasicObject); + assert_not_subtype(types::NilClass, types::HeapBasicObject); + assert_not_subtype(types::TrueClass, types::HeapBasicObject); + assert_not_subtype(types::FalseClass, types::HeapBasicObject); + assert_not_subtype(types::StaticSymbol, types::HeapBasicObject); + assert_subtype(types::DynamicSymbol, types::HeapBasicObject); + assert_not_subtype(types::Flonum, types::HeapBasicObject); + assert_subtype(types::HeapFloat, types::HeapBasicObject); + assert_not_subtype(types::BasicObject, types::HeapBasicObject); + assert_not_subtype(types::Object, types::HeapBasicObject); + assert_not_subtype(types::Immediate, types::HeapBasicObject); + assert_not_subtype(types::HeapBasicObject, types::Immediate); + crate::cruby::with_rubyvm(|| { + let left = Type::from_value(rust_str_to_ruby("hello")); + let right = Type::from_value(rust_str_to_ruby("world")); + assert_subtype(left, types::HeapBasicObject); + assert_subtype(right, types::HeapBasicObject); + assert_subtype(left.union(right), types::HeapBasicObject); + }); + } + #[test] fn heap_object() { assert_not_subtype(Type::fixnum(123), types::HeapObject); @@ -845,6 +878,17 @@ mod tests { }); } + #[test] + fn string_subclass_is_string_subtype() { + crate::cruby::with_rubyvm(|| { + assert_subtype(types::StringExact, types::String); + assert_subtype(Type::from_class(unsafe { rb_cString }), types::String); + assert_subtype(Type::from_class(unsafe { rb_cString }), types::StringExact); + let c_class = define_class("C", unsafe { rb_cString }); + assert_subtype(Type::from_class(c_class), types::String); + }); + } + #[test] fn union_specialized_with_no_relation_returns_unspecialized() { crate::cruby::with_rubyvm(|| { diff --git a/zjit/src/options.rs b/zjit/src/options.rs index b7b20e63c4484d..f4a52e1ccdc6bd 100644 --- a/zjit/src/options.rs +++ b/zjit/src/options.rs @@ -261,7 +261,7 @@ fn parse_option(str_ptr: *const std::os::raw::c_char) -> Option<()> { ("trace-exits", exits) => { options.trace_side_exits = match exits { "" => Some(TraceExits::All), - name => Counter::get(name).map(TraceExits::Counter), + name => Some(Counter::get(name).map(TraceExits::Counter)?), } }