Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/setup/directories/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ runs:
git config --global init.defaultBranch garbage

- if: inputs.checkout
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
with:
path: ${{ inputs.srcdir }}
fetch-depth: ${{ inputs.fetch-depth }}
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,7 @@ AS_CASE(["$target_os"],
RT_VER=`echo "$rb_cv_msvcrt" | tr -cd [0-9]`
test "$RT_VER" = "" && RT_VER=60
test "$rb_cv_msvcrt" = "ucrt" && RT_VER=140
AS_IF([test $RT_VER -lt 120], AC_MSG_ERROR(Runtime library $RT_VER is not supported))
AC_DEFINE_UNQUOTED(RUBY_MSVCRT_VERSION, $RT_VER)
sysconfdir=
])
Expand Down
4 changes: 0 additions & 4 deletions debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,7 @@ ruby_debug_breakpoint(void)
}

#if defined _WIN32
# if RUBY_MSVCRT_VERSION >= 80
extern int ruby_w32_rtc_error;
# endif
#endif
#if defined _WIN32 || defined __CYGWIN__
#include <windows.h>
Expand Down Expand Up @@ -233,9 +231,7 @@ ruby_env_debug_option(const char *str, int len, void *arg)
SET_WHEN("ci", ruby_on_ci, 1);
SET_WHEN_UINT("rgengc", &ruby_rgengc_debug, 1, ruby_rgengc_debug = 1);
#if defined _WIN32
# if RUBY_MSVCRT_VERSION >= 80
SET_WHEN("rtc_error", ruby_w32_rtc_error, 1);
# endif
#endif
#if defined _WIN32 || defined __CYGWIN__
SET_WHEN_UINT("codepage", ruby_w32_codepage, numberof(ruby_w32_codepage),
Expand Down
3 changes: 1 addition & 2 deletions doc/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -322,8 +322,7 @@
# _Substitution_
#
# - #dump: Returns a printable version of +self+, enclosed in double-quotes.
# - #undump: Returns a copy of +self+ with all <tt>\xNN</tt> notations replaced by <tt>\uNNNN</tt> notations
# and all escaped characters unescaped.
# - #undump: Inverse of #dump; returns a copy of +self+ with changes of the kinds made by #dump "undone."
# - #sub: Returns a copy of +self+ with the first substring matching a given pattern
# replaced with a given replacement string.
# - #gsub: Returns a copy of +self+ with each substring that matches a given pattern
Expand Down
137 changes: 91 additions & 46 deletions doc/string/dump.rdoc
Original file line number Diff line number Diff line change
@@ -1,52 +1,97 @@
Returns a printable version of +self+, enclosed in double-quotes:
For an ordinary string, this method, +String#dump+,
returns a printable ASCII-only version of +self+, enclosed in double-quotes.

'hello'.dump # => "\"hello\""
For a dumped string, method String#undump is the inverse of +String#dump+;
it returns a "restored" version of +self+,
where all the dumping changes have been undone.

Certain special characters are rendered with escapes:
In the simplest case, the dumped string contains the original string,
enclosed in double-quotes;
this example is done in +irb+ (interactive Ruby), which uses method `inspect` to render the results:

'"'.dump # => "\"\\\"\""
'\\'.dump # => "\"\\\\\""
s = 'hello' # => "hello"
s.dump # => "\"hello\""
s.dump.undump # => "hello"

Non-printing characters are rendered with escapes:
Keep in mind that in the second line above:

- The outer double-quotes are put on by +inspect+,
and _are_ _not_ part of the output of #dump.
- The inner double-quotes _are_ part of the output of +dump+,
and are escaped by +inspect+ because they are within the outer double-quotes.

To avoid confusion, we'll use this helper method to omit the outer double-quotes:

def dump(s)
print "String: ", s, "\n"
print "Dumped: ", s.dump, "\n"
print "Undumped: ", s.dump.undump, "\n"
end

So that for string <tt>'hello'</tt>, we'll see:

String: hello
Dumped: "hello"
Undumped: hello

In a dump, certain special characters are escaped:

String: "
Dumped: "\""
Undumped: "

String: \
Dumped: "\\"
Undumped: \

In a dump, unprintable characters are replaced by printable ones;
the unprintable characters are the whitespace characters (other than space itself);
here we see the ordinals for those characers, together with explanatory text:

h = {
7 => 'Alert (BEL)',
8 => 'Backspace (BS)',
9 => 'Horizontal tab (HT)',
10 => 'Linefeed (LF)',
11 => 'Vertical tab (VT)',
12 => 'Formfeed (FF)',
13 => 'Carriage return (CR)'
}

In this example, the dumped output is printed by method #inspect,
and so contains both outer double-quotes and escaped inner double-quotes:

s = ''
s << 7 # Alarm (bell).
s << 8 # Back space.
s << 9 # Horizontal tab.
s << 10 # Line feed.
s << 11 # Vertical tab.
s << 12 # Form feed.
s << 13 # Carriage return.
s # => "\a\b\t\n\v\f\r"
s.dump # => "\"\\a\\b\\t\\n\\v\\f\\r\""

If +self+ is encoded in UTF-8 and contains Unicode characters, renders Unicode
characters in Unicode escape sequence:

'тест'.dump # => "\"\\u0442\\u0435\\u0441\\u0442\""
'こんにちは'.dump # => "\"\\u3053\\u3093\\u306B\\u3061\\u306F\""

If the encoding of +self+ is not ASCII-compatible (i.e., +self.encoding.ascii_compatible?+
returns +false+), renders all ASCII-compatible bytes as ASCII characters and all
other bytes as hexadecimal. Appends <tt>.dup.force_encoding(\"encoding\")</tt>, where
<tt><encoding></tt> is +self.encoding.name+:

s = 'hello'
s.encoding # => #<Encoding:UTF-8>
s.dump # => "\"hello\""
s.encode('utf-16').dump # => "\"\\xFE\\xFF\\x00h\\x00e\\x00l\\x00l\\x00o\".dup.force_encoding(\"UTF-16\")"
s.encode('utf-16le').dump # => "\"h\\x00e\\x00l\\x00l\\x00o\\x00\".dup.force_encoding(\"UTF-16LE\")"

s = 'тест'
s.encoding # => #<Encoding:UTF-8>
s.dump # => "\"\\u0442\\u0435\\u0441\\u0442\""
s.encode('utf-16').dump # => "\"\\xFE\\xFF\\x04B\\x045\\x04A\\x04B\".dup.force_encoding(\"UTF-16\")"
s.encode('utf-16le').dump # => "\"B\\x045\\x04A\\x04B\\x04\".dup.force_encoding(\"UTF-16LE\")"

s = 'こんにちは'
s.encoding # => #<Encoding:UTF-8>
s.dump # => "\"\\u3053\\u3093\\u306B\\u3061\\u306F\""
s.encode('utf-16').dump # => "\"\\xFE\\xFF0S0\\x930k0a0o\".dup.force_encoding(\"UTF-16\")"
s.encode('utf-16le').dump # => "\"S0\\x930k0a0o0\".dup.force_encoding(\"UTF-16LE\")"

Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String].
h.keys.each {|i| s << i } # => [7, 8, 9, 10, 11, 12, 13]
s # => "\a\b\t\n\v\f\r"
s.dump # => "\"\\a\\b\\t\\n\\v\\f\\r\""

If +self+ is encoded in UTF-8 and contains Unicode characters,
each Unicode character is dumped as a Unicode escape sequence:

String: тест
Dumped: "\u0442\u0435\u0441\u0442"
Undumped: тест

String: こんにちは
Dumped: "\u3053\u3093\u306B\u3061\u306F"
Undumped: こんにちは

If the encoding of +self+ is not ASCII-compatible
(i.e., if <tt>self.encoding.ascii_compatible?</tt> returns +false+),
each ASCII-compatible byte is dumped as an ASCII character,
and all other bytes are dumped as hexadecimal;
also appends <tt>.dup.force_encoding(\"encoding\")</tt>,
where <tt><encoding></tt> is <tt>self.encoding.name</tt>:

String: hello
Dumped: "\xFE\xFF\x00h\x00e\x00l\x00l\x00o".dup.force_encoding("UTF-16")
Undumped: hello

String: тест
Dumped: "\xFE\xFF\x04B\x045\x04A\x04B".dup.force_encoding("UTF-16")
Undumped: тест

String: こんにちは
Dumped: "\xFE\xFF0S0\x930k0a0o".dup.force_encoding("UTF-16")
Undumped: こんにちは
28 changes: 28 additions & 0 deletions doc/string/unicode_normalize.rdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
Returns a copy of +self+ with
{Unicode normalization}[https://unicode.org/reports/tr15] applied.

Argument +form+ must be one of the following symbols
(see {Unicode normalization forms}[https://unicode.org/reports/tr15/#Norm_Forms]):

- +:nfc+: Canonical decomposition, followed by canonical composition.
- +:nfd+: Canonical decomposition.
- +:nfkc+: Compatibility decomposition, followed by canonical composition.
- +:nfkd+: Compatibility decomposition.

The encoding of +self+ must be one of:

- <tt>Encoding::UTF_8</tt>.
- <tt>Encoding::UTF_16BE</tt>.
- <tt>Encoding::UTF_16LE</tt>.
- <tt>Encoding::UTF_32BE</tt>.
- <tt>Encoding::UTF_32LE</tt>.
- <tt>Encoding::GB18030</tt>.
- <tt>Encoding::UCS_2BE</tt>.
- <tt>Encoding::UCS_4BE</tt>.

Examples:

"a\u0300".unicode_normalize # => "à" # Lowercase 'a' with grave accens.
"a\u0300".unicode_normalize(:nfd) # => "à" # Same.

Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String].
2 changes: 1 addition & 1 deletion error.c
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,7 @@ NORETURN(static void die(void));
static void
die(void)
{
#if defined(_WIN32) && defined(RUBY_MSVCRT_VERSION) && RUBY_MSVCRT_VERSION >= 80
#if defined(_WIN32)
_set_abort_behavior( 0, _CALL_REPORTFAULT);
#endif

Expand Down
4 changes: 0 additions & 4 deletions ext/etc/etc.c
Original file line number Diff line number Diff line change
Expand Up @@ -832,11 +832,7 @@ etc_uname(VALUE obj)
rb_w32_conv_from_wchar(v.szCSDVersion, rb_utf8_encoding()));
rb_hash_aset(result, SYMBOL_LIT("version"), version);

# if defined _MSC_VER && _MSC_VER < 1300
# define GET_COMPUTER_NAME(ptr, plen) GetComputerNameW(ptr, plen)
# else
# define GET_COMPUTER_NAME(ptr, plen) GetComputerNameExW(ComputerNameDnsFullyQualified, ptr, plen)
# endif
GET_COMPUTER_NAME(NULL, &len);
buf = ALLOCV_N(WCHAR, vbuf, len);
if (GET_COMPUTER_NAME(buf, &len)) {
Expand Down
2 changes: 1 addition & 1 deletion ext/etc/etc.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,5 @@ Gem::Specification.new do |spec|
spec.require_paths = ["lib"]
spec.extensions = %w{ext/etc/extconf.rb}

spec.required_ruby_version = ">= 2.6.0"
spec.required_ruby_version = ">= 2.7.0"
end
3 changes: 0 additions & 3 deletions ext/socket/getaddrinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,6 @@
#endif
#include <unistd.h>
#else
#if defined(_MSC_VER) && _MSC_VER <= 1200
#include <windows.h>
#endif
#include <winsock2.h>
#include <ws2tcpip.h>
#include <io.h>
Expand Down
3 changes: 0 additions & 3 deletions ext/socket/getnameinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,6 @@
#endif
#endif
#ifdef _WIN32
#if defined(_MSC_VER) && _MSC_VER <= 1200
#include <windows.h>
#endif
#include <winsock2.h>
#include <ws2tcpip.h>
#define snprintf _snprintf
Expand Down
2 changes: 0 additions & 2 deletions ext/socket/rubysocket.h
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,6 @@ extern ID tcp_fast_fallback;
const char *inet_ntop(int, const void *, char *, size_t);
#elif defined __MINGW32__
# define inet_ntop(f,a,n,l) rb_w32_inet_ntop(f,a,n,l)
#elif defined _MSC_VER && RUBY_MSVCRT_VERSION < 90
const char *WSAAPI inet_ntop(int, const void *, char *, size_t);
#endif

#endif
8 changes: 7 additions & 1 deletion gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3332,7 +3332,13 @@ rb_gc_obj_optimal_size(VALUE obj)
return sizeof(struct RObject);
}
else {
return rb_obj_embedded_size(ROBJECT_FIELDS_CAPACITY(obj));
size_t size = rb_obj_embedded_size(ROBJECT_FIELDS_CAPACITY(obj));
if (rb_gc_size_allocatable_p(size)) {
return size;
}
else {
return sizeof(struct RObject);
}
}

case T_STRING:
Expand Down
21 changes: 21 additions & 0 deletions gc/mmtk/mmtk.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct objspace {
unsigned long live_ractor_cache_count;

pthread_mutex_t mutex;
rb_atomic_t mutator_blocking_count;
bool world_stopped;
pthread_cond_t cond_world_stopped;
pthread_cond_t cond_world_started;
Expand Down Expand Up @@ -131,7 +132,9 @@ rb_mmtk_block_for_gc(MMTk_VMMutatorThread mutator)
struct objspace *objspace = rb_gc_get_objspace();

size_t starting_gc_count = objspace->gc_count;
RUBY_ATOMIC_INC(objspace->mutator_blocking_count);
int lock_lev = RB_GC_VM_LOCK();
RUBY_ATOMIC_DEC(objspace->mutator_blocking_count);
int err;
if ((err = pthread_mutex_lock(&objspace->mutex)) != 0) {
rb_bug("ERROR: cannot lock objspace->mutex: %s", strerror(err));
Expand Down Expand Up @@ -1049,7 +1052,25 @@ rb_gc_impl_before_fork(void *objspace_ptr)
{
struct objspace *objspace = objspace_ptr;

retry:
objspace->fork_hook_vm_lock_lev = RB_GC_VM_LOCK();
rb_gc_vm_barrier();

/* At this point, we know that all the Ractors are paused because of the
* rb_gc_vm_barrier above. Since rb_mmtk_block_for_gc is a barrier point,
* one or more Ractors could be paused there. However, mmtk_before_fork is
* not compatible with that because it assumes that the MMTk workers are idle,
* but the workers are not idle because they are busy working on a GC.
*
* This essentially implements a trylock. It will optimistically lock but will
* release the lock if it detects that any other Ractors are waiting in
* rb_mmtk_block_for_gc.
*/
rb_atomic_t mutator_blocking_count = RUBY_ATOMIC_LOAD(objspace->mutator_blocking_count);
if (mutator_blocking_count != 0) {
RB_GC_VM_UNLOCK(objspace->fork_hook_vm_lock_lev);
goto retry;
}

mmtk_before_fork();
}
Expand Down
13 changes: 1 addition & 12 deletions include/ruby/internal/compiler_is/msvc.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,8 @@
# define RBIMPL_COMPILER_VERSION_MINOR (_MSC_FULL_VER % 10000000 / 100000)
# define RBIMPL_COMPILER_VERSION_PATCH (_MSC_FULL_VER % 100000)

#elif defined(_MSC_FULL_VER)
# define RBIMPL_COMPILER_IS_MSVC 1
# /* _MSC_FULL_VER = XXYYZZZZ */
# define RBIMPL_COMPILER_VERSION_MAJOR (_MSC_FULL_VER / 1000000)
# define RBIMPL_COMPILER_VERSION_MINOR (_MSC_FULL_VER % 1000000 / 10000)
# define RBIMPL_COMPILER_VERSION_PATCH (_MSC_FULL_VER % 10000)

#else
# define RBIMPL_COMPILER_IS_MSVC 1
# /* _MSC_VER = XXYY */
# define RBIMPL_COMPILER_VERSION_MAJOR (_MSC_VER / 100)
# define RBIMPL_COMPILER_VERSION_MINOR (_MSC_VER % 100)
# define RBIMPL_COMPILER_VERSION_PATCH 0
# error Unsupported MSVC version
#endif

#endif /* RBIMPL_COMPILER_IS_MSVC_H */
Loading