Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions depend
Original file line number Diff line number Diff line change
Expand Up @@ -7393,6 +7393,7 @@ jit.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
jit.$(OBJEXT): $(top_srcdir)/internal/serial.h
jit.$(OBJEXT): $(top_srcdir)/internal/set_table.h
jit.$(OBJEXT): $(top_srcdir)/internal/static_assert.h
jit.$(OBJEXT): $(top_srcdir)/internal/string.h
jit.$(OBJEXT): $(top_srcdir)/internal/variable.h
jit.$(OBJEXT): $(top_srcdir)/internal/vm.h
jit.$(OBJEXT): $(top_srcdir)/internal/warnings.h
Expand Down
134 changes: 69 additions & 65 deletions doc/string/split.rdoc
Original file line number Diff line number Diff line change
@@ -1,99 +1,103 @@
Returns an array of substrings of +self+
that are the result of splitting +self+
Creates an array of substrings by splitting +self+
at each occurrence of the given field separator +field_sep+.

When +field_sep+ is <tt>$;</tt>:
With no arguments given,
splits using the field separator <tt>$;</tt>,
whose default value is +nil+.

- If <tt>$;</tt> is +nil+ (its default value),
the split occurs just as if +field_sep+ were given as a space character
(see below).
With no block given, returns the array of substrings:

- If <tt>$;</tt> is a string,
the split occurs just as if +field_sep+ were given as that string
(see below).
'abracadabra'.split('a') # => ["", "br", "c", "d", "br"]

When +field_sep+ is <tt>' '</tt> and +limit+ is +0+ (its default value),
the split occurs at each sequence of whitespace:
When +field_sep+ is +nil+ or <tt>' '</tt> (a single space),
splits at each sequence of whitespace:

'abc def ghi'.split(' ') # => ["abc", "def", "ghi"]
"abc \n\tdef\t\n ghi".split(' ') # => ["abc", "def", "ghi"]
'abc def ghi'.split(' ') # => ["abc", "def", "ghi"]
'foo bar baz'.split(nil) # => ["foo", "bar", "baz"]
'foo bar baz'.split(' ') # => ["foo", "bar", "baz"]
"foo \n\tbar\t\n baz".split(' ') # => ["foo", "bar", "baz"]
'foo bar baz'.split(' ') # => ["foo", "bar", "baz"]
''.split(' ') # => []

When +field_sep+ is a string different from <tt>' '</tt>
and +limit+ is +0+,
the split occurs at each occurrence of +field_sep+;
trailing empty substrings are not returned:
When +field_sep+ is an empty string,
splits at every character:

'abracadabra'.split('ab') # => ["", "racad", "ra"]
'aaabcdaaa'.split('a') # => ["", "", "", "bcd"]
''.split('a') # => []
'3.14159'.split('1') # => ["3.", "4", "59"]
'!@#$%^$&*($)_+'.split('$') # => ["!@#", "%^", "&*(", ")_+"]
'тест'.split('т') # => ["", "ес"]
'こんにちは'.split('に') # => ["こん", "ちは"]
'abracadabra'.split('') # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"]
''.split('') # => []
'тест'.split('') # => ["т", "е", "с", "т"]
'こんにちは'.split('') # => ["こ", "ん", "に", "ち", "は"]

When +field_sep+ is a Regexp and +limit+ is +0+,
the split occurs at each occurrence of a match;
trailing empty substrings are not returned:
When +field_sep+ is a non-empty string and different from <tt>' '</tt> (a single space),
uses that string as the separator:

'abracadabra'.split('a') # => ["", "br", "c", "d", "br"]
'abracadabra'.split('ab') # => ["", "racad", "ra"]
''.split('a') # => []
'тест'.split('т') # => ["", "ес"]
'こんにちは'.split('に') # => ["こん", "ちは"]

When +field_sep+ is a Regexp,
splits at each occurrence of a matching substring:

'abracadabra'.split(/ab/) # => ["", "racad", "ra"]
'aaabcdaaa'.split(/a/) # => ["", "", "", "bcd"]
'aaabcdaaa'.split(//) # => ["a", "a", "a", "b", "c", "d", "a", "a", "a"]
'1 + 1 == 2'.split(/\W+/) # => ["1", "1", "2"]
'abracadabra'.split(//) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"]

If the \Regexp contains groups, their matches are also included
If the \Regexp contains groups, their matches are included
in the returned array:

'1:2:3'.split(/(:)()()/, 2) # => ["1", ":", "", "", "2:3"]

As seen above, if +limit+ is +0+,
trailing empty substrings are not returned:
Argument +limit+ sets a limit on the size of the returned array;
it also determines whether trailing empty strings are included in the returned array.

'aaabcdaaa'.split('a') # => ["", "", "", "bcd"]
When +limit+ is zero,
there is no limit on the size of the array,
but trailing empty strings are omitted:

If +limit+ is positive integer +n+, no more than <tt>n - 1-</tt>
splits occur, so that at most +n+ substrings are returned,
and trailing empty substrings are included:
'abracadabra'.split('', 0) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a"]
'abracadabra'.split('a', 0) # => ["", "br", "c", "d", "br"] # Empty string after last 'a' omitted.

'aaabcdaaa'.split('a', 1) # => ["aaabcdaaa"]
'aaabcdaaa'.split('a', 2) # => ["", "aabcdaaa"]
'aaabcdaaa'.split('a', 5) # => ["", "", "", "bcd", "aa"]
'aaabcdaaa'.split('a', 7) # => ["", "", "", "bcd", "", "", ""]
'aaabcdaaa'.split('a', 8) # => ["", "", "", "bcd", "", "", ""]
When +limit+ is a positive integer,
there is a limit on the size of the array (no more than <tt>n - 1</tt> splits occur),
and trailing empty strings are included:

Note that if +field_sep+ is a \Regexp containing groups,
their matches are in the returned array, but do not count toward the limit.
'abracadabra'.split('', 3) # => ["a", "b", "racadabra"]
'abracadabra'.split('a', 3) # => ["", "br", "cadabra"]
'abracadabra'.split('', 30) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a", ""]
'abracadabra'.split('a', 30) # => ["", "br", "c", "d", "br", ""]
'abracadabra'.split('', 1) # => ["abracadabra"]
'abracadabra'.split('a', 1) # => ["abracadabra"]

If +limit+ is negative, it behaves the same as if +limit+ was zero,
meaning that there is no limit,
and trailing empty substrings are included:
When +limit+ is negative,
there is no limit on the size of the array,
and trailing empty strings are omitted:

'aaabcdaaa'.split('a', -1) # => ["", "", "", "bcd", "", "", ""]
'abracadabra'.split('', -1) # => ["a", "b", "r", "a", "c", "a", "d", "a", "b", "r", "a", ""]
'abracadabra'.split('a', -1) # => ["", "br", "c", "d", "br", ""]

If a block is given, it is called with each substring and returns +self+:

'abc def ghi'.split(' ') {|substring| p substring }
'foo bar baz'.split(' ') {|substring| p substring }

Output :

"foo"
"bar"
"baz"

Output:
Note that the above example is functionally equivalent to:

"abc"
"def"
"ghi"
=> "abc def ghi"
'foo bar baz'.split(' ').each {|substring| p substring }

Note that the above example is functionally the same as calling +#each+ after
+#split+ and giving the same block. However, the above example has better
performance because it avoids the creation of an intermediate array. Also,
note the different return values.
Output :

'abc def ghi'.split(' ').each {|substring| p substring }
"foo"
"bar"
"baz"

Output:
But the latter:

"abc"
"def"
"ghi"
=> ["abc", "def", "ghi"]
- Has poorer performance because it creates an intermediate array.
- Returns an array (instead of +self+).

Related: String#partition, String#rpartition.
Related: see {Converting to Non-String}[rdoc-ref:String@Converting+to+Non--5CString].
33 changes: 33 additions & 0 deletions doc/string/squeeze.rdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
Returns a copy of +self+ with each tuple (doubling, tripling, etc.) of specified characters
"squeezed" down to a single character.

The tuples to be squeezed are specified by arguments +selectors+,
each of which is a string;
see {Character Selectors}[rdoc-ref:character_selectors.rdoc@Character+Selectors].

A single argument may be a single character:

'Noooooo!'.squeeze('o') # => "No!"
'foo bar baz'.squeeze(' ') # => "foo bar baz"
'Mississippi'.squeeze('s') # => "Misisippi"
'Mississippi'.squeeze('p') # => "Mississipi"
'Mississippi'.squeeze('x') # => "Mississippi" # Unused selector character is ignored.
'бессонница'.squeeze('с') # => "бесонница"
'бессонница'.squeeze('н') # => "бессоница"

A single argument may be a string of characters:

'Mississippi'.squeeze('sp') # => "Misisipi"
'Mississippi'.squeeze('ps') # => "Misisipi" # Order doesn't matter.
'Mississippi'.squeeze('nonsense') # => "Misisippi" # Unused selector characters are ignored.

A single argument may be a range of characters:

'Mississippi'.squeeze('a-p') # => "Mississipi"
'Mississippi'.squeeze('q-z') # => "Misisippi"
'Mississippi'.squeeze('a-z') # => "Misisipi"

Multiple arguments are allowed;
see {Multiple Character Selectors}[rdoc-ref:character_selectors.rdoc@Multiple+Character+Selectors].

Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String].
10 changes: 10 additions & 0 deletions doc/zjit.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,16 @@ A file called `zjit_exits_{pid}.dump` will be created in the same directory as `
stackprof path/to/zjit_exits_{pid}.dump
```

### Printing ZJIT Errors

`--zjit-debug` prints ZJIT compilation errors and other diagnostics:

```bash
./miniruby --zjit-debug script.rb
```

As you might guess from the name, this option is intended mostly for ZJIT developers.

## Useful dev commands

To view YARV output for code snippets:
Expand Down
2 changes: 1 addition & 1 deletion ext/socket/tcpsocket.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

/*
* call-seq:
* TCPSocket.new(remote_host, remote_port, local_host=nil, local_port=nil, resolv_timeout: nil, connect_timeout: nil, fast_fallback: true)
* TCPSocket.new(remote_host, remote_port, local_host=nil, local_port=nil, resolv_timeout: nil, connect_timeout: nil, open_timeout: nil, fast_fallback: true)
*
* Opens a TCP connection to +remote_host+ on +remote_port+. If +local_host+
* and +local_port+ are specified, then those parameters are used on the local
Expand Down
15 changes: 15 additions & 0 deletions jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "internal/gc.h"
#include "vm_sync.h"
#include "internal/fixnum.h"
#include "internal/string.h"

enum jit_bindgen_constants {
// Field offsets for the RObject struct
Expand Down Expand Up @@ -180,6 +181,12 @@ rb_jit_get_proc_ptr(VALUE procv)
return proc;
}

unsigned int
rb_jit_iseq_builtin_attrs(const rb_iseq_t *iseq)
{
return iseq->body->builtin_attrs;
}

int
rb_get_mct_argc(const rb_method_cfunc_t *mct)
{
Expand Down Expand Up @@ -750,3 +757,11 @@ rb_jit_fix_mod_fix(VALUE recv, VALUE obj)
{
return rb_fix_mod_fix(recv, obj);
}

// YJIT/ZJIT need this function to never allocate and never raise
VALUE
rb_yarv_str_eql_internal(VALUE str1, VALUE str2)
{
// We wrap this since it's static inline
return rb_str_eql_internal(str1, str2);
}
7 changes: 5 additions & 2 deletions ruby.c
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,9 @@ moreswitches(const char *s, ruby_cmdline_options_t *opt, int envopt)
argc = RSTRING_LEN(argary) / sizeof(ap);
ap = 0;
rb_str_cat(argary, (char *)&ap, sizeof(ap));
argv = ptr = ALLOC_N(char *, argc);

VALUE ptr_obj;
argv = ptr = RB_ALLOCV_N(char *, ptr_obj, argc);
MEMMOVE(argv, RSTRING_PTR(argary), char *, argc);

while ((i = proc_options(argc, argv, opt, envopt)) > 1 && envopt && (argc -= i) > 0) {
Expand Down Expand Up @@ -948,7 +950,8 @@ moreswitches(const char *s, ruby_cmdline_options_t *opt, int envopt)
opt->crash_report = crash_report;
}

ruby_xfree(ptr);
RB_ALLOCV_END(ptr_obj);

/* get rid of GC */
rb_str_resize(argary, 0);
rb_str_resize(argstr, 0);
Expand Down
13 changes: 2 additions & 11 deletions string.c
Original file line number Diff line number Diff line change
Expand Up @@ -8971,16 +8971,7 @@ rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
* call-seq:
* squeeze(*selectors) -> new_string
*
* Returns a copy of +self+ with characters specified by +selectors+ "squeezed"
* (see {Multiple Character Selectors}[rdoc-ref:character_selectors.rdoc@Multiple+Character+Selectors]):
*
* "Squeezed" means that each multiple-character run of a selected character
* is squeezed down to a single character;
* with no arguments given, squeezes all characters:
*
* "yellow moon".squeeze #=> "yelow mon"
* " now is the".squeeze(" ") #=> " now is the"
* "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
* :include: doc/string/squeeze.rdoc
*
*/

Expand Down Expand Up @@ -9201,7 +9192,7 @@ literal_split_pattern(VALUE spat, split_type_t default_type)

/*
* call-seq:
* split(field_sep = $;, limit = 0) -> array
* split(field_sep = $;, limit = 0) -> array_of_substrings
* split(field_sep = $;, limit = 0) {|substring| ... } -> self
*
* :include: doc/string/split.rdoc
Expand Down
21 changes: 21 additions & 0 deletions test/ruby/test_zjit.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1962,6 +1962,27 @@ def test
}, call_threshold: 2
end

def test_block_given_p
assert_compiles "false", "block_given?"
assert_compiles '[false, false, true]', %q{
def test = block_given?
[test, test, test{}]
}, call_threshold: 2, insns: [:opt_send_without_block]
end

def test_block_given_p_from_block
# This will do some EP hopping to find the local EP,
# so it's slightly different than doing it outside of a block.

assert_compiles '[false, false, true]', %q{
def test
yield_self { yield_self { block_given? } }
end

[test, test, test{}]
}, call_threshold: 2
end

def test_invokeblock_without_block_after_jit_call
assert_compiles '"no block given (yield)"', %q{
def test(*arr, &b)
Expand Down
14 changes: 0 additions & 14 deletions yjit.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,6 @@ rb_optimized_call(VALUE *recv, rb_execution_context_t *ec, int argc, VALUE *argv
return rb_vm_invoke_proc(ec, proc, argc, argv, kw_splat, block_handler);
}

unsigned int
rb_yjit_iseq_builtin_attrs(const rb_iseq_t *iseq)
{
return iseq->body->builtin_attrs;
}

// If true, the iseq has only opt_invokebuiltin_delegate(_leave) and leave insns.
static bool
invokebuiltin_delegate_leave_p(const rb_iseq_t *iseq)
Expand Down Expand Up @@ -283,14 +277,6 @@ rb_yjit_str_simple_append(VALUE str1, VALUE str2)

extern VALUE *rb_vm_base_ptr(struct rb_control_frame_struct *cfp);

// YJIT needs this function to never allocate and never raise
VALUE
rb_yarv_str_eql_internal(VALUE str1, VALUE str2)
{
// We wrap this since it's static inline
return rb_str_eql_internal(str1, str2);
}

VALUE
rb_str_neq_internal(VALUE str1, VALUE str2)
{
Expand Down
2 changes: 1 addition & 1 deletion yjit/bindgen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ fn main() {
.allowlist_function("rb_jit_mark_executable")
.allowlist_function("rb_jit_mark_unused")
.allowlist_function("rb_jit_get_page_size")
.allowlist_function("rb_yjit_iseq_builtin_attrs")
.allowlist_function("rb_jit_iseq_builtin_attrs")
.allowlist_function("rb_yjit_iseq_inspect")
.allowlist_function("rb_yjit_builtin_function")
.allowlist_function("rb_set_cfp_(pc|sp)")
Expand Down
Loading