From 21c78cb0f72f81052323292a1b9fc7a20dee44f6 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 21 Jul 2025 16:09:28 -0400 Subject: [PATCH 1/3] [DOC] Docs for String#dump --- doc/string/dump.rdoc | 52 ++++++++++++++++++++++++++++++++++++++++++++ string.c | 11 ++-------- 2 files changed, 54 insertions(+), 9 deletions(-) create mode 100644 doc/string/dump.rdoc diff --git a/doc/string/dump.rdoc b/doc/string/dump.rdoc new file mode 100644 index 00000000000000..a5ab0bb42f2464 --- /dev/null +++ b/doc/string/dump.rdoc @@ -0,0 +1,52 @@ +Returns a printable version of +self+, enclosed in double-quotes: + + 'hello'.dump # => "\"hello\"" + +Certain special characters are rendered with escapes: + + '"'.dump # => "\"\\\"\"" + '\\'.dump # => "\"\\\\\"" + +Non-printing characters are rendered with escapes: + + s = '' + s << 7 # Alarm (bell). + s << 8 # Back space. + s << 9 # Horizontal tab. + s << 10 # Line feed. + s << 11 # Vertical tab. + s << 12 # Form feed. + s << 13 # Carriage return. + s # => "\a\b\t\n\v\f\r" + s.dump # => "\"\\a\\b\\t\\n\\v\\f\\r\"" + +If +self+ is encoded in UTF-8 and contains Unicode characters, renders Unicode +characters in Unicode escape sequence: + + 'тест'.dump # => "\"\\u0442\\u0435\\u0441\\u0442\"" + 'こんにちは'.dump # => "\"\\u3053\\u3093\\u306B\\u3061\\u306F\"" + +If the encoding of +self+ is not ASCII-compatible (i.e., +self.encoding.ascii_compatible?+ +returns +false+), renders all ASCII-compatible bytes as ASCII characters and all +other bytes as hexadecimal. Appends .dup.force_encoding(\"encoding\"), where + is +self.encoding.name+: + + s = 'hello' + s.encoding # => # + s.dump # => "\"hello\"" + s.encode('utf-16').dump # => "\"\\xFE\\xFF\\x00h\\x00e\\x00l\\x00l\\x00o\".dup.force_encoding(\"UTF-16\")" + s.encode('utf-16le').dump # => "\"h\\x00e\\x00l\\x00l\\x00o\\x00\".dup.force_encoding(\"UTF-16LE\")" + + s = 'тест' + s.encoding # => # + s.dump # => "\"\\u0442\\u0435\\u0441\\u0442\"" + s.encode('utf-16').dump # => "\"\\xFE\\xFF\\x04B\\x045\\x04A\\x04B\".dup.force_encoding(\"UTF-16\")" + s.encode('utf-16le').dump # => "\"B\\x045\\x04A\\x04B\\x04\".dup.force_encoding(\"UTF-16LE\")" + + s = 'こんにちは' + s.encoding # => # + s.dump # => "\"\\u3053\\u3093\\u306B\\u3061\\u306F\"" + s.encode('utf-16').dump # => "\"\\xFE\\xFF0S0\\x930k0a0o\".dup.force_encoding(\"UTF-16\")" + s.encode('utf-16le').dump # => "\"S0\\x930k0a0o0\".dup.force_encoding(\"UTF-16LE\")" + +Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. diff --git a/string.c b/string.c index 1668f06e463619..ba04d42841bf27 100644 --- a/string.c +++ b/string.c @@ -7413,16 +7413,9 @@ rb_str_inspect(VALUE str) /* * call-seq: - * dump -> string + * dump -> new_string * - * Returns a printable version of +self+, enclosed in double-quotes, - * with special characters escaped, and with non-printing characters - * replaced by hexadecimal notation: - * - * "hello \n ''".dump # => "\"hello \\n ''\"" - * "\f\x00\xff\\\"".dump # => "\"\\f\\x00\\xFF\\\\\\\"\"" - * - * Related: String#undump (inverse of String#dump). + * :include: doc/string/dump.rdoc * */ From a495e6a44ce8cff17461b250e32ab63e409a642d Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Mon, 21 Jul 2025 08:20:45 -0700 Subject: [PATCH 2/3] [ruby/prism] Clear flags on interpolated strings When inner strings aren't frozen, we need to clear the flags on interpolated string nodes so that we don't emit wrong instructions. The compiler is currently incorrectly emitting frozen strings because the parser is erroneously declaring interpolated strings as "frozen". We need to fix this behavior in the parser so we can fix the compiler in CRuby. This patch is a partial fix for [this bug](https://bugs.ruby-lang.org/issues/21187) https://github.com/ruby/prism/commit/eda693f056 --- prism/prism.c | 4 ++++ test/prism/result/static_literals_test.rb | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/prism/prism.c b/prism/prism.c index 85647020d832cb..a40e0ebeb0c821 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -5279,6 +5279,10 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_ switch (PM_NODE_TYPE(part)) { case PM_STRING_NODE: + // If inner string is not frozen, clear flags for this string + if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) { + CLEAR_FLAGS(node); + } part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE); break; case PM_INTERPOLATED_STRING_NODE: diff --git a/test/prism/result/static_literals_test.rb b/test/prism/result/static_literals_test.rb index dcfc692897cd66..cc070279169aba 100644 --- a/test/prism/result/static_literals_test.rb +++ b/test/prism/result/static_literals_test.rb @@ -4,6 +4,11 @@ module Prism class StaticLiteralsTest < TestCase + def test_concatenanted_string_literal_is_not_static + node = Prism.parse_statement("'a' 'b'") + refute_predicate node, :static_literal? + end + def test_static_literals assert_warning("1") assert_warning("0xA", "10", "10") From 7dbd9c26361719a45fa39838f46a76d67dc3c2e9 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Tue, 22 Jul 2025 09:06:02 +0900 Subject: [PATCH 3/3] Revert "[ruby/prism] Clear flags on interpolated strings" This reverts commit a495e6a44ce8cff17461b250e32ab63e409a642d. This break extension builds: ``` /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:321:in 'String#replace': can't modify frozen String: "$(SDKROOT)$(prefix)/include" (FrozenError) from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:321:in 'RbConfig.expand' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:314:in 'block in RbConfig.expand' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:307:in 'String#gsub' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:307:in 'RbConfig.expand' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:314:in 'block in RbConfig.expand' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:307:in 'String#gsub' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:307:in 'RbConfig.expand' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:314:in 'block in RbConfig.expand' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:307:in 'String#gsub' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:307:in 'RbConfig.expand' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:325:in 'block in ' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:324:in 'Hash#each_value' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:324:in '' from /Users/hsbt/Documents/github.com/ruby/ruby/rbconfig.rb:11:in '' from ./ext/extmk.rb:42:in 'Kernel#require' from ./ext/extmk.rb:42:in '
' make[1]: *** [ext/configure-ext.mk:70: ext/json/exts.mk] Error 1 ``` --- prism/prism.c | 4 ---- test/prism/result/static_literals_test.rb | 5 ----- 2 files changed, 9 deletions(-) diff --git a/prism/prism.c b/prism/prism.c index a40e0ebeb0c821..85647020d832cb 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -5279,10 +5279,6 @@ pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_ switch (PM_NODE_TYPE(part)) { case PM_STRING_NODE: - // If inner string is not frozen, clear flags for this string - if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) { - CLEAR_FLAGS(node); - } part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE); break; case PM_INTERPOLATED_STRING_NODE: diff --git a/test/prism/result/static_literals_test.rb b/test/prism/result/static_literals_test.rb index cc070279169aba..dcfc692897cd66 100644 --- a/test/prism/result/static_literals_test.rb +++ b/test/prism/result/static_literals_test.rb @@ -4,11 +4,6 @@ module Prism class StaticLiteralsTest < TestCase - def test_concatenanted_string_literal_is_not_static - node = Prism.parse_statement("'a' 'b'") - refute_predicate node, :static_literal? - end - def test_static_literals assert_warning("1") assert_warning("0xA", "10", "10")