From 1b8c6c1e33a46ce0cec0f0e3c56b94fd1c4fc357 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 9 Mar 2026 15:04:07 +0900 Subject: [PATCH 01/27] Suppress a sign-compare warning This cast is safe because `rb_absint_size` returns `nlz` within the range `0...CHAR_BIT`. --- numeric.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numeric.c b/numeric.c index 226a47f7b8f38b..287294f9b5a899 100644 --- a/numeric.c +++ b/numeric.c @@ -4332,7 +4332,7 @@ int_accurate_in_double(VALUE n) const size_t mant_size = roomof(DBL_MANT_DIG, CHAR_BIT); if (size < mant_size) return true; if (size > mant_size) return false; - if (nlz >= (CHAR_BIT * mant_size - DBL_MANT_DIG)) return true; + if ((size_t)nlz >= (CHAR_BIT * mant_size - DBL_MANT_DIG)) return true; #endif return false; } From 59f744c5b028449e97254928ead35f9602491b42 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 9 Mar 2026 15:22:14 +0900 Subject: [PATCH 02/27] Suppress format warnings Use the appropriate modifier. `size_t` is not always `unsigned long`, even if the size is the same. --- gc/default/default.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 1099d6e0dc11e5..1c91a5e0d5b0fc 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -8266,7 +8266,7 @@ rb_gc_impl_free(void *objspace_ptr, void *ptr, size_t old_size) } if (old_size && (old_size + sizeof(struct malloc_obj_info)) != info->size) { - rb_bug("buffer %p freed with old_size=%lu, but was allocated with size=%lu", ptr, old_size, info->size - sizeof(struct malloc_obj_info)); + rb_bug("buffer %p freed with old_size=%zu, but was allocated with size=%zu", ptr, old_size, info->size - sizeof(struct malloc_obj_info)); } #endif ptr = info; @@ -8377,7 +8377,7 @@ rb_gc_impl_realloc(void *objspace_ptr, void *ptr, size_t new_size, size_t old_si ptr = info; #if VERIFY_FREE_SIZE if (old_size && (old_size + sizeof(struct malloc_obj_info)) != info->size) { - rb_bug("buffer %p realloced with old_size=%lu, but was allocated with size=%lu", ptr, old_size, info->size - sizeof(struct malloc_obj_info)); + rb_bug("buffer %p realloced with old_size=%zu, but was allocated with size=%zu", ptr, old_size, info->size - sizeof(struct malloc_obj_info)); } #endif old_size = info->size; From 910682638b8871ee913bf0e97f1ad857d682b72b Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 6 Mar 2026 13:29:53 +0900 Subject: [PATCH 03/27] [ruby/rubygems] Add YAML serializer (dump) for Gem objects Replace the simple dump_hash method with dump_obj that can serialize Gem::Specification, Version, Platform, Requirement, Dependency, Hash, Array, Time, and String objects into YAML format. This enables pure-Ruby YAML serialization without depending on Psych. The serializer handles multiline strings (block scalars), special character quoting ($, !, &, *, :, @, %), and proper indentation for nested structures. https://github.com/ruby/rubygems/commit/bffb238b35 --- lib/rubygems/yaml_serializer.rb | 100 +++++++++++++++++++++++++------- 1 file changed, 79 insertions(+), 21 deletions(-) diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index f89004f32ad8e4..278ebd624e4ce3 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -1,32 +1,94 @@ # frozen_string_literal: true module Gem - # A stub yaml serializer that can handle only hashes and strings (as of now). module YAMLSerializer module_function - def dump(hash) - yaml = String.new("---") - yaml << dump_hash(hash) + def dump(obj) + "---#{dump_obj(obj, 0)}" end - def dump_hash(hash) - yaml = String.new("\n") - hash.each do |k, v| - yaml << k << ":" - if v.is_a?(Hash) - yaml << dump_hash(v).gsub(/^(?!$)/, " ") # indent all non-empty lines - elsif v.is_a?(Array) # Expected to be array of strings - if v.empty? - yaml << " []\n" - else - yaml << "\n- " << v.map {|s| s.to_s.gsub(/\s+/, " ").inspect }.join("\n- ") << "\n" + def dump_obj(obj, indent, quote: false) + case obj + when Gem::Specification + parts = [" !ruby/object:Gem::Specification\n"] + parts << "#{" " * indent}name:#{dump_obj(obj.name, indent + 2)}" + parts << "#{" " * indent}version:#{dump_obj(obj.version, indent + 2)}" + parts << "#{" " * indent}platform: #{obj.platform}\n" + if obj.platform.to_s != obj.original_platform.to_s + parts << "#{" " * indent}original_platform: #{obj.original_platform}\n" + end + + attributes = Gem::Specification.attribute_names.map(&:to_s).sort - %w[name version platform] + attributes.each do |name| + val = obj.instance_variable_get("@#{name}") + next if val.nil? + parts << "#{" " * indent}#{name}:#{dump_obj(val, indent + 2)}" + end + res = parts.join + res << "\n" unless res.end_with?("\n") + res + when Gem::Version + " !ruby/object:Gem::Version\n#{" " * indent}version: #{dump_obj(obj.version.to_s, indent + 2).lstrip}" + when Gem::Platform + " !ruby/object:Gem::Platform\n#{" " * indent}cpu: #{obj.cpu.inspect}\n#{" " * indent}os: #{obj.os.inspect}\n#{" " * indent}version: #{obj.version.inspect}\n" + when Gem::Requirement + " !ruby/object:Gem::Requirement\n#{" " * indent}requirements:#{dump_obj(obj.requirements, indent + 2)}" + when Gem::Dependency + [ + " !ruby/object:Gem::Dependency\n", + "#{" " * indent}name: #{dump_obj(obj.name, indent + 2).lstrip}", + "#{" " * indent}requirement:#{dump_obj(obj.requirement, indent + 2)}", + "#{" " * indent}type: #{dump_obj(obj.type, indent + 2).lstrip}", + "#{" " * indent}prerelease: #{dump_obj(obj.prerelease?, indent + 2).lstrip}", + "#{" " * indent}version_requirements:#{dump_obj(obj.requirement, indent + 2)}", + ].join + when Hash + if obj.empty? + " {}\n" + else + parts = ["\n"] + obj.each do |k, v| + is_symbol = k.is_a?(Symbol) || (k.is_a?(String) && k.start_with?(":")) + key_str = k.is_a?(Symbol) ? k.inspect : k.to_s + parts << "#{" " * indent}#{key_str}:#{dump_obj(v, indent + 2, quote: is_symbol)}" end + parts.join + end + when Array + if obj.empty? + " []\n" else - yaml << " " << v.to_s.gsub(/\s+/, " ").inspect << "\n" + parts = ["\n"] + obj.each do |v| + parts << "#{" " * indent}-#{dump_obj(v, indent + 2)}" + end + parts.join end + when Time + " #{obj.utc.strftime("%Y-%m-%d %H:%M:%S.%N Z")}\n" + when String + if obj.include?("\n") + parts = [obj.end_with?("\n") ? " |\n" : " |-\n"] + obj.each_line do |line| + parts << "#{" " * (indent + 2)}#{line}" + end + res = parts.join + res << "\n" unless res.end_with?("\n") + res + elsif quote || obj.empty? || obj =~ /^[!*&:@%$]/ || obj =~ /^-?\d+(\.\d+)?$/ || obj =~ /^[<>=-]/ || + obj == "true" || obj == "false" || obj == "nil" || + obj.include?(":") || obj.include?("#") || obj.include?("[") || obj.include?("]") || + obj.include?("{") || obj.include?("}") || obj.include?(",") + " #{obj.to_s.inspect}\n" + else + " #{obj}\n" + end + when Numeric, Symbol, TrueClass, FalseClass, nil + " #{obj.inspect}\n" + else + " #{obj.to_s.inspect}\n" end - yaml end ARRAY_REGEX = / @@ -90,9 +152,5 @@ def strip_comment(val) val end end - - class << self - private :dump_hash - end end end From 91110cef3bdeacaeccd9e5f74eae95bcaaaf8a99 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 6 Mar 2026 13:31:17 +0900 Subject: [PATCH 04/27] [ruby/rubygems] Add full YAML parser with recursive descent Replace the simple regex-based load method with a full recursive descent parser (parse_any) that handles nested hashes, arrays, block scalars (| and |-), YAML anchors (&anchor) and aliases (*alias), !ruby/object: type tags, flow notation ({} and []), and non-specific tag stripping (! prefix). Add helper methods: parse_block_scalar for multiline strings, build_permitted_tags for security validation, unquote_simple for type coercion (booleans, integers, timestamps, quoted strings), and improve strip_comment to handle # inside quoted strings. The parser returns raw Ruby data structures (Hash, Array, String) with :tag metadata for typed objects, without yet reconstructing Gem-specific objects. https://github.com/ruby/rubygems/commit/053b576c20 --- lib/rubygems/yaml_serializer.rb | 358 ++++++++++++++++++++++++++++---- 1 file changed, 313 insertions(+), 45 deletions(-) diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index 278ebd624e4ce3..626bd264ed02b3 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -91,63 +91,331 @@ def dump_obj(obj, indent, quote: false) end end - ARRAY_REGEX = / - ^ - (?:[ ]*-[ ]) # '- ' before array items - (['"]?) # optional opening quote - (.*) # value - \1 # matching closing quote - $ - /xo - - HASH_REGEX = / - ^ - ([ ]*) # indentations - ([^#]+) # key excludes comment char '#' - (?::(?=(?:\s|$))) # : (without the lookahead the #key includes this when : is present in value) - [ ]? - (['"]?) # optional opening quote - (.*) # value - \3 # matching closing quote - $ - /xo - - def load(str) - res = {} - stack = [res] - last_hash = nil - last_empty_key = nil - str.split(/\r?\n/) do |line| - if match = HASH_REGEX.match(line) - indent, key, quote, val = match.captures - val = strip_comment(val) + def load(str, permitted_classes: [], permitted_symbols: [], aliases: true) + return {} if str.nil? || str.empty? + lines = str.split(/\r?\n/) + if lines[0]&.start_with?("---") + if lines[0].strip == "---" + lines.shift + else + lines[0] = lines[0].sub(/^---\s*/, "") + end + end + + permitted_tags = build_permitted_tags(permitted_classes) + anchors = {} + data = nil + while lines.any? + before_count = lines.size + parsed = parse_any(lines, -1, permitted_tags, aliases, anchors) + if lines.size == before_count && lines.any? + lines.shift + end + + if data.is_a?(Hash) && parsed.is_a?(Hash) + data.merge!(parsed) + elsif data.nil? + data = parsed + end + end + + return {} if data.nil? + data + end + + def parse_any(lines, base_indent, permitted_tags, aliases, anchors) + while lines.any? && (lines[0].strip.empty? || lines[0].lstrip.start_with?("#")) + lines.shift + end + return nil if lines.empty? + + indent = lines[0][/^ */].size + return nil if indent < base_indent - depth = indent.size / 2 - if quote.empty? && val.empty? - new_hash = {} - stack[depth][key] = new_hash - stack[depth + 1] = new_hash - last_empty_key = key - last_hash = stack[depth] + line = lines[0] + + # Check for alias reference (*anchor) + if line.lstrip.start_with?("*") + unless aliases + raise ArgumentError, "YAML aliases are not allowed" + end + alias_name = lines.shift.lstrip[1..-1].strip + return anchors[alias_name] + end + + # Extract anchor if present (&anchor) + anchor_name = nil + if line.lstrip =~ /^&(\S+)\s+/ + unless aliases + raise ArgumentError, "YAML aliases are not allowed" + end + anchor_name = $1 + line = line.sub(/&#{Regexp.escape(anchor_name)}\s+/, "") + lines[0] = line + end + + if line.lstrip.start_with?("- ") || line.lstrip == "-" + res = [] + while lines.any? && lines[0][/^ */].size == indent && (lines[0].lstrip.start_with?("- ") || lines[0].lstrip == "-") + l = lines.shift + content = l.lstrip[1..-1].strip + + # Check for anchor in array item + item_anchor = nil + if content =~ /^&(\S+)/ + unless aliases + raise ArgumentError, "YAML aliases are not allowed" + end + item_anchor = $1 + content = content.sub(/^&#{Regexp.escape(item_anchor)}\s*/, "") + end + + # Check for alias in array item + if content.start_with?("*") + unless aliases + raise ArgumentError, "YAML aliases are not allowed" + end + alias_name = content[1..-1].strip + res << anchors[alias_name] + elsif content.empty? + # Empty array item - check if next line is nested content or a new item + item_value = if lines.any? && lines[0][/^ */].size > indent + parse_any(lines, indent, permitted_tags, aliases, anchors) + end + anchors[item_anchor] = item_value if item_anchor + res << item_value + elsif content.start_with?("!ruby/object:") + tag = content.strip + unless permitted_tags.include?(tag) + raise ArgumentError, "Disallowed class: #{tag}" + end + nested = parse_any(lines, indent, permitted_tags, aliases, anchors) + item_value = if nested.is_a?(Hash) + nested[:tag] = tag + nested + else + { :tag => tag, "value" => nested } + end + anchors[item_anchor] = item_value if item_anchor + res << item_value + elsif content.start_with?("-") + lines.unshift(" " * (indent + 2) + content) + item_value = parse_any(lines, indent, permitted_tags, aliases, anchors) + anchors[item_anchor] = item_value if item_anchor + res << item_value + elsif content =~ /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/ && !content.start_with?("!ruby/object:") + lines.unshift(" " * (indent + 2) + content) + item_value = parse_any(lines, indent, permitted_tags, aliases, anchors) + anchors[item_anchor] = item_value if item_anchor + res << item_value + elsif content.start_with?("|") + modifier = content[1..-1].to_s.strip + item_value = parse_block_scalar(lines, indent, modifier) + anchors[item_anchor] = item_value if item_anchor + res << item_value else - val = [] if val == "[]" # empty array - stack[depth][key] = val + str = unquote_simple(content) + while lines.any? && !lines[0].strip.empty? && lines[0][/^ */].size > indent + str << " " << lines.shift.strip + end + anchors[item_anchor] = str if item_anchor + res << str end - elsif match = ARRAY_REGEX.match(line) - _, val = match.captures + end + result = res + elsif line.lstrip =~ /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/ && !line.lstrip.start_with?("!ruby/object:") + res = Hash.new + while lines.any? && lines[0][/^ */].size == indent && lines[0].lstrip =~ /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/ && !lines[0].lstrip.start_with?("!ruby/object:") + l = lines.shift + l.lstrip =~ /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/ + key = $1.strip + val = $2.to_s.strip val = strip_comment(val) - last_hash[last_empty_key] = [] unless last_hash[last_empty_key].is_a?(Array) + # Check for anchor in value + val_anchor = nil + if val =~ /^&(\S+)\s+/ + unless aliases + raise ArgumentError, "YAML aliases are not allowed" + end + val_anchor = $1 + val = val.sub(/^&#{Regexp.escape(val_anchor)}\s+/, "") + end + + # Check for alias in value + if val.start_with?("*") + unless aliases + raise ArgumentError, "YAML aliases are not allowed" + end + alias_name = val[1..-1].strip + res[key] = anchors[alias_name] + elsif val.start_with?("!ruby/object:") + tag = val.strip + unless permitted_tags.include?(tag) + raise ArgumentError, "Disallowed class: #{tag}" + end + nested = parse_any(lines, indent, permitted_tags, aliases, anchors) + value = if nested.is_a?(Hash) + nested[:tag] = tag + nested + else + { :tag => tag, "value" => nested } + end + anchors[val_anchor] = value if val_anchor + res[key] = value + elsif val.empty? + value = if lines.any? && (lines[0].lstrip.start_with?("- ") || lines[0].lstrip == "-") && lines[0][/^ */].size == indent + parse_any(lines, indent, permitted_tags, aliases, anchors) + else + parse_any(lines, indent + 1, permitted_tags, aliases, anchors) + end + anchors[val_anchor] = value if val_anchor + res[key] = value + elsif val == "[]" + value = [] + anchors[val_anchor] = value if val_anchor + res[key] = value + elsif val == "{}" + value = {} + anchors[val_anchor] = value if val_anchor + res[key] = value + elsif val.start_with?("|") + modifier = val[1..-1].to_s.strip + value = parse_block_scalar(lines, indent, modifier) + anchors[val_anchor] = value if val_anchor + res[key] = value + else + str = unquote_simple(val) + while lines.any? && !lines[0].strip.empty? && lines[0][/^ */].size > indent + str << " " << lines.shift.strip + end + anchors[val_anchor] = str if val_anchor + res[key] = str + end + end + result = res + elsif line.lstrip.start_with?("!ruby/object:") + tag = lines.shift.lstrip.strip + unless permitted_tags.include?(tag) + raise ArgumentError, "Disallowed class: #{tag}" + end + nested = parse_any(lines, indent, permitted_tags, aliases, anchors) + if nested.is_a?(Hash) + nested[:tag] = tag + result = nested + else + result = { :tag => tag, "value" => nested } + end + elsif line.lstrip.start_with?("|") + modifier = line.lstrip[1..-1].to_s.strip + lines.shift + result = parse_block_scalar(lines, indent, modifier) + else + str = unquote_simple(lines.shift.strip) + while lines.any? && !lines[0].strip.empty? && lines[0][/^ */].size > indent + str << " " << lines.shift.strip + end + result = str + end + + # Store anchor if present + anchors[anchor_name] = result if anchor_name + result + end - last_hash[last_empty_key].push(val) + def parse_block_scalar(lines, base_indent, modifier) + parts = [] + block_indent = nil + while lines.any? + if lines[0].strip.empty? + parts << "\n" + lines.shift + else + line_indent = lines[0][/^ */].size + break if line_indent <= base_indent + block_indent ||= line_indent + l = lines.shift + parts << l[block_indent..-1].to_s << "\n" end end + res = parts.join + res.chomp! if modifier == "-" && res.end_with?("\n") res end + def build_permitted_tags(permitted_classes) + Array(permitted_classes).map do |klass| + name = klass.is_a?(Module) ? klass.name : klass.to_s + "!ruby/object:#{name}" + end + end + def strip_comment(val) - if val.include?("#") && !val.start_with?("#") - val.split("#", 2).first.strip + return val unless val.include?("#") + return val if val.lstrip.start_with?("#") + + in_single = false + in_double = false + escape = false + + val.each_char.with_index do |ch, i| + if escape + escape = false + next + end + + if in_single + in_single = false if ch == "'" + elsif in_double + if ch == "\\" + escape = true + elsif ch == '"' + in_double = false + end + else + case ch + when "'" + in_single = true + when '"' + in_double = true + when "#" + return val[0...i].rstrip + end + end + end + + val + end + + def unquote_simple(val) + # Strip YAML non-specific tag (! prefix), e.g. ! '>=' -> '>=' + val = val.sub(/^! /, "") if val.start_with?("! ") + + if val =~ /^"(.*)"$/ + $1.gsub(/\\"/, '"').gsub(/\\n/, "\n").gsub(/\\r/, "\r").gsub(/\\t/, "\t").gsub(/\\\\/, "\\") + elsif val =~ /^'(.*)'$/ + $1.gsub(/''/, "'") + elsif val == "true" + true + elsif val == "false" + false + elsif val == "nil" + nil + elsif val == "{}" + {} + elsif val =~ /^\[(.*)\]$/ + inner = $1.strip + return [] if inner.empty? + inner.split(/\s*,\s*/).reject(&:empty?).map {|element| unquote_simple(element) } + elsif /^\d{4}-\d{2}-\d{2}/.match?(val) + require "time" + begin + Time.parse(val) + rescue ArgumentError + val + end + elsif /^-?\d+$/.match?(val) + val.to_i else val end From 45cb5d003064a2333c18e4ca7cc358f01a0ce2ad Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 6 Mar 2026 13:31:31 +0900 Subject: [PATCH 05/27] [ruby/rubygems] Add Gem object reconstruction from parsed YAML Add convert_to_spec and convert_any methods that transform parsed YAML data structures (with :tag metadata) back into Gem::Specification, Version, Platform, Requirement, and Dependency objects. Wire these into the load method so it returns fully reconstructed Gem objects. convert_to_spec normalizes specification_version to Integer, rdoc_options to Array of Strings, and other array fields (files, test_files, executables, requirements, extra_rdoc_files) to proper arrays. convert_any handles Gem::Version::Requirement (legacy) and validates requirement operators and dependency type symbols against permitted lists. https://github.com/ruby/rubygems/commit/fe1a29ef2d --- lib/rubygems/yaml_serializer.rb | 184 +++++++++++++++++++++++++++++++- 1 file changed, 182 insertions(+), 2 deletions(-) diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index 626bd264ed02b3..8f321e0e0573cb 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -80,7 +80,7 @@ def dump_obj(obj, indent, quote: false) obj == "true" || obj == "false" || obj == "nil" || obj.include?(":") || obj.include?("#") || obj.include?("[") || obj.include?("]") || obj.include?("{") || obj.include?("}") || obj.include?(",") - " #{obj.to_s.inspect}\n" + " #{obj.to_s.inspect}\n" else " #{obj}\n" end @@ -120,7 +120,12 @@ def load(str, permitted_classes: [], permitted_symbols: [], aliases: true) end return {} if data.nil? - data + + if data.is_a?(Hash) && (data[:tag] == "!ruby/object:Gem::Specification" || data["tag"] == "!ruby/object:Gem::Specification") + convert_to_spec(data, permitted_symbols) + else + convert_any(data, permitted_symbols) + end end def parse_any(lines, base_indent, permitted_tags, aliases, anchors) @@ -350,6 +355,181 @@ def build_permitted_tags(permitted_classes) end end + def convert_to_spec(hash, permitted_symbols) + spec = Gem::Specification.allocate + return spec unless hash.is_a?(Hash) + + converted_hash = {} + hash.each {|k, v| converted_hash[k] = convert_any(v, permitted_symbols) } + + # Ensure specification_version is an Integer if it's a valid numeric string + if converted_hash["specification_version"] && !converted_hash["specification_version"].is_a?(Integer) + val = converted_hash["specification_version"] + if val.is_a?(String) && /\A\d+\z/.match?(val) + converted_hash["specification_version"] = val.to_i + end + end + + # Debug: log rdoc_options that contain non-string elements + if converted_hash["rdoc_options"] && converted_hash["name"] + rdoc_opts = converted_hash["rdoc_options"] + has_non_string = case rdoc_opts + when Array then rdoc_opts.any? {|o| !o.is_a?(String) } + when Hash then true + else true + end + if has_non_string + warn "[DEBUG rdoc_options] gem=#{converted_hash["name"]} class=#{rdoc_opts.class} value=#{rdoc_opts.inspect}" + end + end + + # Ensure rdoc_options is an Array of Strings + if converted_hash["rdoc_options"].is_a?(Hash) + converted_hash["rdoc_options"] = converted_hash["rdoc_options"].values.flatten.compact.map(&:to_s) + elsif converted_hash["rdoc_options"].is_a?(Array) + converted_hash["rdoc_options"] = converted_hash["rdoc_options"].flat_map do |opt| + if opt.is_a?(Hash) + opt.flat_map {|k, v| [k.to_s, v.to_s] } + elsif opt.is_a?(String) + opt + else + opt.to_s + end + end + end + + # Ensure other array fields are properly typed + ["files", "test_files", "executables", "requirements", "extra_rdoc_files"].each do |field| + if converted_hash[field].is_a?(Hash) + converted_hash[field] = converted_hash[field].values.flatten.compact + elsif !converted_hash[field].is_a?(Array) && converted_hash[field] + converted_hash[field] = [converted_hash[field]].flatten.compact + end + end + + spec.yaml_initialize("!ruby/object:Gem::Specification", converted_hash) + spec + end + + def convert_any(obj, permitted_symbols) + if obj.is_a?(Hash) + if obj[:tag] == "!ruby/object:Gem::Version" + ver = obj["version"] || obj["value"] + Gem::Version.new(ver.to_s) + elsif obj[:tag] == "!ruby/object:Gem::Platform" + if obj["value"] + Gem::Platform.new(obj["value"]) + else + Gem::Platform.new([obj["cpu"], obj["os"], obj["version"]]) + end + elsif ["!ruby/object:Gem::Requirement", "!ruby/object:Gem::Version::Requirement"].include?(obj[:tag]) + r = Gem::Requirement.allocate + raw_reqs = obj["requirements"] || obj["value"] + reqs = convert_any(raw_reqs, permitted_symbols) + # Ensure reqs is an array (never nil or Hash) + reqs = [] unless reqs.is_a?(Array) + if reqs.is_a?(Array) && !reqs.empty? + safe_reqs = [] + reqs.each do |item| + if item.is_a?(Array) && item.size == 2 + op = item[0].to_s + ver = item[1] + # Validate that op is a valid requirement operator + if ["=", "!=", ">", "<", ">=", "<=", "~>"].include?(op) + version_obj = if ver.is_a?(Gem::Version) + ver + else + Gem::Version.new(ver.to_s) + end + safe_reqs << [op, version_obj] + end + elsif item.is_a?(String) + # Try to validate the requirement string + parsed = Gem::Requirement.parse(item) + safe_reqs << parsed + end + rescue Gem::Requirement::BadRequirementError, Gem::Version::BadVersionError + # Skip malformed items silently + end + reqs = safe_reqs unless safe_reqs.empty? + end + r.instance_variable_set(:@requirements, reqs) + r + elsif obj[:tag] == "!ruby/object:Gem::Dependency" + d = Gem::Dependency.allocate + d.instance_variable_set(:@name, obj["name"]) + + # Ensure requirement is properly formed + requirement = begin + converted_req = convert_any(obj["requirement"], permitted_symbols) + # Validate that the requirement has valid requirements + if converted_req.is_a?(Gem::Requirement) + # Check if the requirement has any invalid items + reqs = converted_req.instance_variable_get(:@requirements) + if reqs&.is_a?(Array) + # Verify all requirements are valid + valid = reqs.all? do |item| + next true if item == Gem::Requirement::DefaultRequirement + if item.is_a?(Array) && item.size >= 2 + ["=", "!=", ">", "<", ">=", "<=", "~>"].include?(item[0].to_s) + else + false + end + end + valid ? converted_req : Gem::Requirement.default + else + converted_req + end + else + converted_req + end + rescue StandardError + Gem::Requirement.default + end + + d.instance_variable_set(:@requirement, requirement) + + type = obj["type"] + if type + type = type.to_s.sub(/^:/, "").to_sym + else + type = :runtime + end + if permitted_symbols.any? && !permitted_symbols.include?(type.to_s) + raise ArgumentError, "Disallowed symbol: #{type.inspect}" + end + d.instance_variable_set(:@type, type) + + d.instance_variable_set(:@prerelease, ["true", true].include?(obj["prerelease"])) + d.instance_variable_set(:@version_requirements, d.instance_variable_get(:@requirement)) + d + else + res = Hash.new + obj.each do |k, v| + next if k == :tag + key_str = k.to_s + converted_val = convert_any(v, permitted_symbols) + + # Convert Hash to Array for fields that should be arrays + if ["rdoc_options", "files", "test_files", "executables", "requirements", "extra_rdoc_files"].include?(key_str) + if converted_val.is_a?(Hash) + converted_val = converted_val.values.flatten.compact + elsif !converted_val.is_a?(Array) && converted_val + converted_val = [converted_val].flatten.compact + end + end + + res[key_str] = converted_val + end + res + end + elsif obj.is_a?(Array) + obj.map {|i| convert_any(i, permitted_symbols) } + else + obj + end + end + def strip_comment(val) return val unless val.include?("#") return val if val.lstrip.start_with?("#") From a551f4fbf50a4e063abe08ba703a9f70709fdc5f Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 6 Mar 2026 14:29:54 +0900 Subject: [PATCH 06/27] [ruby/rubygems] Refactor YAMLSerializer into Parser/Builder/Emitter https://github.com/ruby/rubygems/commit/bfe17c110c --- lib/rubygems/yaml_serializer.rb | 1142 +++++++++++++++++-------------- 1 file changed, 642 insertions(+), 500 deletions(-) diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index 8f321e0e0573cb..720911194e1d32 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -2,603 +2,745 @@ module Gem module YAMLSerializer - module_function + Scalar = Struct.new(:value, :tag, :anchor, keyword_init: true) - def dump(obj) - "---#{dump_obj(obj, 0)}" + Mapping = Struct.new(:pairs, :tag, :anchor, keyword_init: true) do + def initialize(pairs: [], tag: nil, anchor: nil) + super + end end - def dump_obj(obj, indent, quote: false) - case obj - when Gem::Specification - parts = [" !ruby/object:Gem::Specification\n"] - parts << "#{" " * indent}name:#{dump_obj(obj.name, indent + 2)}" - parts << "#{" " * indent}version:#{dump_obj(obj.version, indent + 2)}" - parts << "#{" " * indent}platform: #{obj.platform}\n" - if obj.platform.to_s != obj.original_platform.to_s - parts << "#{" " * indent}original_platform: #{obj.original_platform}\n" + Sequence = Struct.new(:items, :tag, :anchor, keyword_init: true) do + def initialize(items: [], tag: nil, anchor: nil) + super + end + end + + AliasRef = Struct.new(:name, keyword_init: true) + + class Parser + MAPPING_KEY_RE = /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/ + + def initialize(source) + @lines = source.split(/\r?\n/) + @anchors = {} + strip_document_prefix + end + + def parse + return nil if @lines.empty? + + root = nil + while @lines.any? + before = @lines.size + node = parse_node(-1) + @lines.shift if @lines.size == before && @lines.any? + + if root.is_a?(Mapping) && node.is_a?(Mapping) + root.pairs.concat(node.pairs) + elsif root.nil? + root = node + end end + root + end - attributes = Gem::Specification.attribute_names.map(&:to_s).sort - %w[name version platform] - attributes.each do |name| - val = obj.instance_variable_get("@#{name}") - next if val.nil? - parts << "#{" " * indent}#{name}:#{dump_obj(val, indent + 2)}" + private + + def strip_document_prefix + return if @lines.empty? + return unless @lines[0]&.start_with?("---") + + if @lines[0].strip == "---" + @lines.shift + else + @lines[0] = @lines[0].sub(/^---\s*/, "") end - res = parts.join - res << "\n" unless res.end_with?("\n") - res - when Gem::Version - " !ruby/object:Gem::Version\n#{" " * indent}version: #{dump_obj(obj.version.to_s, indent + 2).lstrip}" - when Gem::Platform - " !ruby/object:Gem::Platform\n#{" " * indent}cpu: #{obj.cpu.inspect}\n#{" " * indent}os: #{obj.os.inspect}\n#{" " * indent}version: #{obj.version.inspect}\n" - when Gem::Requirement - " !ruby/object:Gem::Requirement\n#{" " * indent}requirements:#{dump_obj(obj.requirements, indent + 2)}" - when Gem::Dependency - [ - " !ruby/object:Gem::Dependency\n", - "#{" " * indent}name: #{dump_obj(obj.name, indent + 2).lstrip}", - "#{" " * indent}requirement:#{dump_obj(obj.requirement, indent + 2)}", - "#{" " * indent}type: #{dump_obj(obj.type, indent + 2).lstrip}", - "#{" " * indent}prerelease: #{dump_obj(obj.prerelease?, indent + 2).lstrip}", - "#{" " * indent}version_requirements:#{dump_obj(obj.requirement, indent + 2)}", - ].join - when Hash - if obj.empty? - " {}\n" + end + + def parse_node(base_indent) + skip_blank_and_comments + return nil if @lines.empty? + + indent = @lines[0][/^ */].size + return nil if indent < base_indent + + line = @lines[0] + + return parse_alias_ref if line.lstrip.start_with?("*") + + anchor = consume_anchor + + if line.lstrip.start_with?("- ") || line.lstrip == "-" + parse_sequence(indent, anchor) + elsif line.lstrip =~ MAPPING_KEY_RE && !line.lstrip.start_with?("!ruby/object:") + parse_mapping(indent, anchor) + elsif line.lstrip.start_with?("!ruby/object:") + parse_tagged_node(indent, anchor) + elsif line.lstrip.start_with?("|") + modifier = line.lstrip[1..].to_s.strip + @lines.shift + register_anchor(anchor, Scalar.new(value: parse_block_scalar(indent, modifier))) else - parts = ["\n"] - obj.each do |k, v| - is_symbol = k.is_a?(Symbol) || (k.is_a?(String) && k.start_with?(":")) - key_str = k.is_a?(Symbol) ? k.inspect : k.to_s - parts << "#{" " * indent}#{key_str}:#{dump_obj(v, indent + 2, quote: is_symbol)}" - end - parts.join + parse_plain_scalar(indent, anchor) end - when Array - if obj.empty? - " []\n" + end + + def parse_sequence(indent, anchor) + items = [] + while @lines.any? && @lines[0][/^ */].size == indent && + (@lines[0].lstrip.start_with?("- ") || @lines[0].lstrip == "-") + content = @lines.shift.lstrip[1..].strip + item_anchor, content = extract_item_anchor(content) + item = parse_sequence_item(content, indent) + items << register_anchor(item_anchor, item) + end + register_anchor(anchor, Sequence.new(items: items)) + end + + def parse_sequence_item(content, indent) + if content.start_with?("*") + parse_inline_alias(content) + elsif content.empty? + @lines.any? && @lines[0][/^ */].size > indent ? parse_node(indent) : nil + elsif content.start_with?("!ruby/object:") + parse_tagged_content(content.strip, indent) + elsif content.start_with?("-") + @lines.unshift("#{" " * (indent + 2)}#{content}") + parse_node(indent) + elsif content =~ MAPPING_KEY_RE && !content.start_with?("!ruby/object:") + @lines.unshift("#{" " * (indent + 2)}#{content}") + parse_node(indent) + elsif content.start_with?("|") + Scalar.new(value: parse_block_scalar(indent, content[1..].to_s.strip)) else - parts = ["\n"] - obj.each do |v| - parts << "#{" " * indent}-#{dump_obj(v, indent + 2)}" - end - parts.join + parse_inline_scalar(content, indent) + end + end + + def parse_mapping(indent, anchor) + pairs = [] + while @lines.any? && @lines[0][/^ */].size == indent && + @lines[0].lstrip =~ MAPPING_KEY_RE && !@lines[0].lstrip.start_with?("!ruby/object:") + l = @lines.shift + l.lstrip =~ MAPPING_KEY_RE + key = $1.strip + val = strip_comment($2.to_s.strip) + + val_anchor, val = consume_value_anchor(val) + value = parse_mapping_value(val, indent) + value = register_anchor(val_anchor, value) if val_anchor + + pairs << [Scalar.new(value: key), value] end - when Time - " #{obj.utc.strftime("%Y-%m-%d %H:%M:%S.%N Z")}\n" - when String - if obj.include?("\n") - parts = [obj.end_with?("\n") ? " |\n" : " |-\n"] - obj.each_line do |line| - parts << "#{" " * (indent + 2)}#{line}" + register_anchor(anchor, Mapping.new(pairs: pairs)) + end + + def parse_mapping_value(val, indent) + if val.start_with?("*") + parse_inline_alias(val) + elsif val.start_with?("!ruby/object:") + parse_tagged_content(val.strip, indent) + elsif val.empty? + if @lines.any? && + (@lines[0].lstrip.start_with?("- ") || @lines[0].lstrip == "-") && + @lines[0][/^ */].size == indent + parse_node(indent) + else + parse_node(indent + 1) end - res = parts.join - res << "\n" unless res.end_with?("\n") - res - elsif quote || obj.empty? || obj =~ /^[!*&:@%$]/ || obj =~ /^-?\d+(\.\d+)?$/ || obj =~ /^[<>=-]/ || - obj == "true" || obj == "false" || obj == "nil" || - obj.include?(":") || obj.include?("#") || obj.include?("[") || obj.include?("]") || - obj.include?("{") || obj.include?("}") || obj.include?(",") - " #{obj.to_s.inspect}\n" + elsif val == "[]" + Sequence.new + elsif val == "{}" + Mapping.new + elsif val.start_with?("|") + Scalar.new(value: parse_block_scalar(indent, val[1..].to_s.strip)) else - " #{obj}\n" + parse_inline_scalar(val, indent) end - when Numeric, Symbol, TrueClass, FalseClass, nil - " #{obj.inspect}\n" - else - " #{obj.to_s.inspect}\n" end - end - def load(str, permitted_classes: [], permitted_symbols: [], aliases: true) - return {} if str.nil? || str.empty? - lines = str.split(/\r?\n/) - if lines[0]&.start_with?("---") - if lines[0].strip == "---" - lines.shift + def parse_tagged_node(indent, anchor) + tag = @lines.shift.lstrip.strip + nested = parse_node(indent) + apply_tag(nested, tag, anchor) + end + + def parse_tagged_content(tag, indent) + nested = parse_node(indent) + apply_tag(nested, tag, nil) + end + + def apply_tag(node, tag, anchor) + if node.is_a?(Mapping) + node.tag = tag + node.anchor = anchor + node else - lines[0] = lines[0].sub(/^---\s*/, "") + Mapping.new(pairs: [[Scalar.new(value: "value"), node]], tag: tag, anchor: anchor) end end - permitted_tags = build_permitted_tags(permitted_classes) - anchors = {} - data = nil - while lines.any? - before_count = lines.size - parsed = parse_any(lines, -1, permitted_tags, aliases, anchors) - if lines.size == before_count && lines.any? - lines.shift + def parse_block_scalar(base_indent, modifier) + parts = [] + block_indent = nil + + while @lines.any? + if @lines[0].strip.empty? + parts << "\n" + @lines.shift + else + line_indent = @lines[0][/^ */].size + break if line_indent <= base_indent + block_indent ||= line_indent + parts << @lines.shift[block_indent..].to_s << "\n" + end end - if data.is_a?(Hash) && parsed.is_a?(Hash) - data.merge!(parsed) - elsif data.nil? - data = parsed + res = parts.join + res.chomp! if modifier == "-" && res.end_with?("\n") + res + end + + def parse_plain_scalar(indent, anchor) + result = coerce(@lines.shift.strip) + return register_anchor(anchor, result) if result.is_a?(Mapping) || result.is_a?(Sequence) + + while result.is_a?(String) && @lines.any? && + !@lines[0].strip.empty? && @lines[0][/^ */].size > indent + result << " " << @lines.shift.strip end + register_anchor(anchor, Scalar.new(value: result)) end - return {} if data.nil? + def parse_inline_scalar(val, indent) + result = coerce(val) + return result if result.is_a?(Mapping) || result.is_a?(Sequence) - if data.is_a?(Hash) && (data[:tag] == "!ruby/object:Gem::Specification" || data["tag"] == "!ruby/object:Gem::Specification") - convert_to_spec(data, permitted_symbols) - else - convert_any(data, permitted_symbols) + while result.is_a?(String) && @lines.any? && + !@lines[0].strip.empty? && @lines[0][/^ */].size > indent + result << " " << @lines.shift.strip + end + Scalar.new(value: result) end - end - def parse_any(lines, base_indent, permitted_tags, aliases, anchors) - while lines.any? && (lines[0].strip.empty? || lines[0].lstrip.start_with?("#")) - lines.shift + def coerce(val) + val = val.sub(/^! /, "") if val.start_with?("! ") + + if val =~ /^"(.*)"$/ + $1.gsub(/\\"/, '"').gsub(/\\n/, "\n").gsub(/\\r/, "\r").gsub(/\\t/, "\t").gsub(/\\\\/, "\\") + elsif val =~ /^'(.*)'$/ + $1.gsub(/''/, "'") + elsif val == "true" + true + elsif val == "false" + false + elsif val == "nil" + nil + elsif val == "{}" + Mapping.new + elsif val =~ /^\[(.*)\]$/ + inner = $1.strip + return Sequence.new if inner.empty? + items = inner.split(/\s*,\s*/).reject(&:empty?).map {|e| Scalar.new(value: coerce(e)) } + Sequence.new(items: items) + elsif /^\d{4}-\d{2}-\d{2}/.match?(val) + require "time" + begin + Time.parse(val) + rescue ArgumentError + val + end + elsif /^-?\d+$/.match?(val) + val.to_i + else + val + end + end + + def parse_alias_ref + AliasRef.new(name: @lines.shift.lstrip[1..].strip) end - return nil if lines.empty? - indent = lines[0][/^ */].size - return nil if indent < base_indent + def parse_inline_alias(content) + AliasRef.new(name: content[1..].strip) + end - line = lines[0] + def consume_anchor + line = @lines[0] + return nil unless line.lstrip =~ /^&(\S+)\s+/ - # Check for alias reference (*anchor) - if line.lstrip.start_with?("*") - unless aliases - raise ArgumentError, "YAML aliases are not allowed" - end - alias_name = lines.shift.lstrip[1..-1].strip - return anchors[alias_name] + anchor = $1 + @lines[0] = line.sub(/&#{Regexp.escape(anchor)}\s+/, "") + anchor end - # Extract anchor if present (&anchor) - anchor_name = nil - if line.lstrip =~ /^&(\S+)\s+/ - unless aliases - raise ArgumentError, "YAML aliases are not allowed" - end - anchor_name = $1 - line = line.sub(/&#{Regexp.escape(anchor_name)}\s+/, "") - lines[0] = line + def extract_item_anchor(content) + return [nil, content] unless content =~ /^&(\S+)/ + + anchor = $1 + [anchor, content.sub(/^&#{Regexp.escape(anchor)}\s*/, "")] end - if line.lstrip.start_with?("- ") || line.lstrip == "-" - res = [] - while lines.any? && lines[0][/^ */].size == indent && (lines[0].lstrip.start_with?("- ") || lines[0].lstrip == "-") - l = lines.shift - content = l.lstrip[1..-1].strip + def consume_value_anchor(val) + return [nil, val] unless val =~ /^&(\S+)\s+/ - # Check for anchor in array item - item_anchor = nil - if content =~ /^&(\S+)/ - unless aliases - raise ArgumentError, "YAML aliases are not allowed" - end - item_anchor = $1 - content = content.sub(/^&#{Regexp.escape(item_anchor)}\s*/, "") - end + anchor = $1 + [anchor, val.sub(/^&#{Regexp.escape(anchor)}\s+/, "")] + end - # Check for alias in array item - if content.start_with?("*") - unless aliases - raise ArgumentError, "YAML aliases are not allowed" - end - alias_name = content[1..-1].strip - res << anchors[alias_name] - elsif content.empty? - # Empty array item - check if next line is nested content or a new item - item_value = if lines.any? && lines[0][/^ */].size > indent - parse_any(lines, indent, permitted_tags, aliases, anchors) - end - anchors[item_anchor] = item_value if item_anchor - res << item_value - elsif content.start_with?("!ruby/object:") - tag = content.strip - unless permitted_tags.include?(tag) - raise ArgumentError, "Disallowed class: #{tag}" - end - nested = parse_any(lines, indent, permitted_tags, aliases, anchors) - item_value = if nested.is_a?(Hash) - nested[:tag] = tag - nested - else - { :tag => tag, "value" => nested } - end - anchors[item_anchor] = item_value if item_anchor - res << item_value - elsif content.start_with?("-") - lines.unshift(" " * (indent + 2) + content) - item_value = parse_any(lines, indent, permitted_tags, aliases, anchors) - anchors[item_anchor] = item_value if item_anchor - res << item_value - elsif content =~ /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/ && !content.start_with?("!ruby/object:") - lines.unshift(" " * (indent + 2) + content) - item_value = parse_any(lines, indent, permitted_tags, aliases, anchors) - anchors[item_anchor] = item_value if item_anchor - res << item_value - elsif content.start_with?("|") - modifier = content[1..-1].to_s.strip - item_value = parse_block_scalar(lines, indent, modifier) - anchors[item_anchor] = item_value if item_anchor - res << item_value - else - str = unquote_simple(content) - while lines.any? && !lines[0].strip.empty? && lines[0][/^ */].size > indent - str << " " << lines.shift.strip - end - anchors[item_anchor] = str if item_anchor - res << str - end + def register_anchor(name, node) + if name + @anchors[name] = node + node.anchor = name if node.respond_to?(:anchor=) end - result = res - elsif line.lstrip =~ /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/ && !line.lstrip.start_with?("!ruby/object:") - res = Hash.new - while lines.any? && lines[0][/^ */].size == indent && lines[0].lstrip =~ /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/ && !lines[0].lstrip.start_with?("!ruby/object:") - l = lines.shift - l.lstrip =~ /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/ - key = $1.strip - val = $2.to_s.strip - val = strip_comment(val) - - # Check for anchor in value - val_anchor = nil - if val =~ /^&(\S+)\s+/ - unless aliases - raise ArgumentError, "YAML aliases are not allowed" - end - val_anchor = $1 - val = val.sub(/^&#{Regexp.escape(val_anchor)}\s+/, "") + node + end + + def skip_blank_and_comments + @lines.shift while @lines.any? && + (@lines[0].strip.empty? || @lines[0].lstrip.start_with?("#")) + end + + def strip_comment(val) + return val unless val.include?("#") + return val if val.lstrip.start_with?("#") + + in_single = false + in_double = false + escape = false + + val.each_char.with_index do |ch, i| + if escape + escape = false + next end - # Check for alias in value - if val.start_with?("*") - unless aliases - raise ArgumentError, "YAML aliases are not allowed" + if in_single + in_single = false if ch == "'" + elsif in_double + if ch == "\\" + escape = true + elsif ch == '"' + in_double = false end - alias_name = val[1..-1].strip - res[key] = anchors[alias_name] - elsif val.start_with?("!ruby/object:") - tag = val.strip - unless permitted_tags.include?(tag) - raise ArgumentError, "Disallowed class: #{tag}" - end - nested = parse_any(lines, indent, permitted_tags, aliases, anchors) - value = if nested.is_a?(Hash) - nested[:tag] = tag - nested - else - { :tag => tag, "value" => nested } - end - anchors[val_anchor] = value if val_anchor - res[key] = value - elsif val.empty? - value = if lines.any? && (lines[0].lstrip.start_with?("- ") || lines[0].lstrip == "-") && lines[0][/^ */].size == indent - parse_any(lines, indent, permitted_tags, aliases, anchors) - else - parse_any(lines, indent + 1, permitted_tags, aliases, anchors) - end - anchors[val_anchor] = value if val_anchor - res[key] = value - elsif val == "[]" - value = [] - anchors[val_anchor] = value if val_anchor - res[key] = value - elsif val == "{}" - value = {} - anchors[val_anchor] = value if val_anchor - res[key] = value - elsif val.start_with?("|") - modifier = val[1..-1].to_s.strip - value = parse_block_scalar(lines, indent, modifier) - anchors[val_anchor] = value if val_anchor - res[key] = value else - str = unquote_simple(val) - while lines.any? && !lines[0].strip.empty? && lines[0][/^ */].size > indent - str << " " << lines.shift.strip + case ch + when "'" then in_single = true + when '"' then in_double = true + when "#" then return val[0...i].rstrip end - anchors[val_anchor] = str if val_anchor - res[key] = str end end - result = res - elsif line.lstrip.start_with?("!ruby/object:") - tag = lines.shift.lstrip.strip - unless permitted_tags.include?(tag) - raise ArgumentError, "Disallowed class: #{tag}" + + val + end + end + + class Builder + VALID_OPS = %w[= != > < >= <= ~>].freeze + ARRAY_FIELDS = %w[rdoc_options files test_files executables requirements extra_rdoc_files].freeze + + def initialize(permitted_classes: [], permitted_symbols: [], aliases: true) + @permitted_tags = Array(permitted_classes).map do |c| + "!ruby/object:#{c.is_a?(Module) ? c.name : c}" end - nested = parse_any(lines, indent, permitted_tags, aliases, anchors) - if nested.is_a?(Hash) - nested[:tag] = tag - result = nested + @permitted_symbols = permitted_symbols + @aliases = aliases + @anchor_values = {} + end + + def build(node) + return {} if node.nil? + + result = build_node(node) + + if result.is_a?(Hash) && + (result[:tag] == "!ruby/object:Gem::Specification" || + result["tag"] == "!ruby/object:Gem::Specification") + build_specification(result) else - result = { :tag => tag, "value" => nested } + result + end + end + + private + + def build_node(node) + case node + when nil then nil + when AliasRef then resolve_alias(node) + when Scalar then store_anchor(node.anchor, node.value) + when Mapping then build_mapping(node) + when Sequence then store_anchor(node.anchor, node.items.map {|item| build_node(item) }) + else node # already a Ruby object + end + end + + def resolve_alias(node) + raise ArgumentError, "YAML aliases are not allowed" unless @aliases + @anchor_values.fetch(node.name, nil) + end + + def store_anchor(name, value) + @anchor_values[name] = value if name + value + end + + def build_mapping(node) + validate_tag!(node.tag) if node.tag + check_anchor!(node) + + result = case node.tag + when "!ruby/object:Gem::Version" + build_version(node) + when "!ruby/object:Gem::Platform" + build_platform(node) + when "!ruby/object:Gem::Requirement", "!ruby/object:Gem::Version::Requirement" + build_requirement(node) + when "!ruby/object:Gem::Dependency" + build_dependency(node) + when nil + build_hash(node) + else + hash = build_hash(node) + hash[:tag] = node.tag + hash end - elsif line.lstrip.start_with?("|") - modifier = line.lstrip[1..-1].to_s.strip - lines.shift - result = parse_block_scalar(lines, indent, modifier) - else - str = unquote_simple(lines.shift.strip) - while lines.any? && !lines[0].strip.empty? && lines[0][/^ */].size > indent - str << " " << lines.shift.strip + + store_anchor(node.anchor, result) + end + + def build_hash(node) + result = {} + node.pairs.each do |key_node, value_node| + key = key_node.is_a?(Scalar) ? key_node.value.to_s : build_node(key_node).to_s + value = build_node(value_node) + + if ARRAY_FIELDS.include?(key) + value = normalize_array_field(value) + end + + result[key] = value end - result = str + result end - # Store anchor if present - anchors[anchor_name] = result if anchor_name - result - end + def build_version(node) + hash = pairs_to_hash(node) + Gem::Version.new((hash["version"] || hash["value"]).to_s) + end - def parse_block_scalar(lines, base_indent, modifier) - parts = [] - block_indent = nil - while lines.any? - if lines[0].strip.empty? - parts << "\n" - lines.shift + def build_platform(node) + hash = pairs_to_hash(node) + if hash["value"] + Gem::Platform.new(hash["value"]) else - line_indent = lines[0][/^ */].size - break if line_indent <= base_indent - block_indent ||= line_indent - l = lines.shift - parts << l[block_indent..-1].to_s << "\n" + Gem::Platform.new([hash["cpu"], hash["os"], hash["version"]]) end end - res = parts.join - res.chomp! if modifier == "-" && res.end_with?("\n") - res - end - def build_permitted_tags(permitted_classes) - Array(permitted_classes).map do |klass| - name = klass.is_a?(Module) ? klass.name : klass.to_s - "!ruby/object:#{name}" + def build_requirement(node) + r = Gem::Requirement.allocate + hash = pairs_to_hash(node) + reqs = hash["requirements"] || hash["value"] + reqs = [] unless reqs.is_a?(Array) + + if reqs.is_a?(Array) && !reqs.empty? + safe_reqs = [] + reqs.each do |item| + if item.is_a?(Array) && item.size == 2 + op = item[0].to_s + ver = item[1] + if VALID_OPS.include?(op) + version_obj = ver.is_a?(Gem::Version) ? ver : Gem::Version.new(ver.to_s) + safe_reqs << [op, version_obj] + end + elsif item.is_a?(String) + parsed = Gem::Requirement.parse(item) + safe_reqs << parsed + end + rescue Gem::Requirement::BadRequirementError, Gem::Version::BadVersionError + # Skip malformed items silently + end + reqs = safe_reqs unless safe_reqs.empty? + end + + r.instance_variable_set(:@requirements, reqs) + r end - end - def convert_to_spec(hash, permitted_symbols) - spec = Gem::Specification.allocate - return spec unless hash.is_a?(Hash) + def build_dependency(node) + hash = pairs_to_hash(node) + d = Gem::Dependency.allocate + d.instance_variable_set(:@name, hash["name"]) + + requirement = build_safe_requirement(hash["requirement"]) + d.instance_variable_set(:@requirement, requirement) - converted_hash = {} - hash.each {|k, v| converted_hash[k] = convert_any(v, permitted_symbols) } + type = hash["type"] + type = type ? type.to_s.sub(/^:/, "").to_sym : :runtime + validate_symbol!(type) + d.instance_variable_set(:@type, type) - # Ensure specification_version is an Integer if it's a valid numeric string - if converted_hash["specification_version"] && !converted_hash["specification_version"].is_a?(Integer) - val = converted_hash["specification_version"] - if val.is_a?(String) && /\A\d+\z/.match?(val) - converted_hash["specification_version"] = val.to_i + d.instance_variable_set(:@prerelease, ["true", true].include?(hash["prerelease"])) + d.instance_variable_set(:@version_requirements, d.instance_variable_get(:@requirement)) + d + end + + def build_specification(hash) + spec = Gem::Specification.allocate + + normalize_specification_version!(hash) + normalize_rdoc_options!(hash) + normalize_array_fields!(hash) + + spec.yaml_initialize("!ruby/object:Gem::Specification", hash) + spec + end + + def pairs_to_hash(node) + result = {} + node.pairs.each do |key_node, value_node| + key = key_node.is_a?(Scalar) ? key_node.value.to_s : build_node(key_node).to_s + result[key] = build_node(value_node) end + result end - # Debug: log rdoc_options that contain non-string elements - if converted_hash["rdoc_options"] && converted_hash["name"] - rdoc_opts = converted_hash["rdoc_options"] - has_non_string = case rdoc_opts - when Array then rdoc_opts.any? {|o| !o.is_a?(String) } - when Hash then true - else true + def build_safe_requirement(req_value) + return Gem::Requirement.default unless req_value + + converted = req_value + return Gem::Requirement.default unless converted.is_a?(Gem::Requirement) + + reqs = converted.instance_variable_get(:@requirements) + if reqs&.is_a?(Array) + valid = reqs.all? do |item| + next true if item == Gem::Requirement::DefaultRequirement + item.is_a?(Array) && item.size >= 2 && VALID_OPS.include?(item[0].to_s) + end + valid ? converted : Gem::Requirement.default + else + converted end - if has_non_string - warn "[DEBUG rdoc_options] gem=#{converted_hash["name"]} class=#{rdoc_opts.class} value=#{rdoc_opts.inspect}" + rescue StandardError + Gem::Requirement.default + end + + def validate_tag!(tag) + unless @permitted_tags.include?(tag) + raise ArgumentError, "Disallowed class: #{tag}" end end - # Ensure rdoc_options is an Array of Strings - if converted_hash["rdoc_options"].is_a?(Hash) - converted_hash["rdoc_options"] = converted_hash["rdoc_options"].values.flatten.compact.map(&:to_s) - elsif converted_hash["rdoc_options"].is_a?(Array) - converted_hash["rdoc_options"] = converted_hash["rdoc_options"].flat_map do |opt| - if opt.is_a?(Hash) - opt.flat_map {|k, v| [k.to_s, v.to_s] } - elsif opt.is_a?(String) - opt - else - opt.to_s - end + def validate_symbol!(sym) + if @permitted_symbols.any? && !@permitted_symbols.include?(sym.to_s) + raise ArgumentError, "Disallowed symbol: #{sym.inspect}" end end - # Ensure other array fields are properly typed - ["files", "test_files", "executables", "requirements", "extra_rdoc_files"].each do |field| - if converted_hash[field].is_a?(Hash) - converted_hash[field] = converted_hash[field].values.flatten.compact - elsif !converted_hash[field].is_a?(Array) && converted_hash[field] - converted_hash[field] = [converted_hash[field]].flatten.compact + def check_anchor!(node) + if node.anchor + raise ArgumentError, "YAML aliases are not allowed" unless @aliases end end - spec.yaml_initialize("!ruby/object:Gem::Specification", converted_hash) - spec - end + def normalize_specification_version!(hash) + val = hash["specification_version"] + return unless val && !val.is_a?(Integer) + hash["specification_version"] = val.to_i if val.is_a?(String) && /\A\d+\z/.match?(val) + end - def convert_any(obj, permitted_symbols) - if obj.is_a?(Hash) - if obj[:tag] == "!ruby/object:Gem::Version" - ver = obj["version"] || obj["value"] - Gem::Version.new(ver.to_s) - elsif obj[:tag] == "!ruby/object:Gem::Platform" - if obj["value"] - Gem::Platform.new(obj["value"]) - else - Gem::Platform.new([obj["cpu"], obj["os"], obj["version"]]) - end - elsif ["!ruby/object:Gem::Requirement", "!ruby/object:Gem::Version::Requirement"].include?(obj[:tag]) - r = Gem::Requirement.allocate - raw_reqs = obj["requirements"] || obj["value"] - reqs = convert_any(raw_reqs, permitted_symbols) - # Ensure reqs is an array (never nil or Hash) - reqs = [] unless reqs.is_a?(Array) - if reqs.is_a?(Array) && !reqs.empty? - safe_reqs = [] - reqs.each do |item| - if item.is_a?(Array) && item.size == 2 - op = item[0].to_s - ver = item[1] - # Validate that op is a valid requirement operator - if ["=", "!=", ">", "<", ">=", "<=", "~>"].include?(op) - version_obj = if ver.is_a?(Gem::Version) - ver - else - Gem::Version.new(ver.to_s) - end - safe_reqs << [op, version_obj] - end - elsif item.is_a?(String) - # Try to validate the requirement string - parsed = Gem::Requirement.parse(item) - safe_reqs << parsed - end - rescue Gem::Requirement::BadRequirementError, Gem::Version::BadVersionError - # Skip malformed items silently - end - reqs = safe_reqs unless safe_reqs.empty? - end - r.instance_variable_set(:@requirements, reqs) - r - elsif obj[:tag] == "!ruby/object:Gem::Dependency" - d = Gem::Dependency.allocate - d.instance_variable_set(:@name, obj["name"]) - - # Ensure requirement is properly formed - requirement = begin - converted_req = convert_any(obj["requirement"], permitted_symbols) - # Validate that the requirement has valid requirements - if converted_req.is_a?(Gem::Requirement) - # Check if the requirement has any invalid items - reqs = converted_req.instance_variable_get(:@requirements) - if reqs&.is_a?(Array) - # Verify all requirements are valid - valid = reqs.all? do |item| - next true if item == Gem::Requirement::DefaultRequirement - if item.is_a?(Array) && item.size >= 2 - ["=", "!=", ">", "<", ">=", "<=", "~>"].include?(item[0].to_s) - else - false - end - end - valid ? converted_req : Gem::Requirement.default - else - converted_req - end + def normalize_rdoc_options!(hash) + opts = hash["rdoc_options"] + if opts.is_a?(Hash) + hash["rdoc_options"] = opts.values.flatten.compact.map(&:to_s) + elsif opts.is_a?(Array) + hash["rdoc_options"] = opts.flat_map do |opt| + if opt.is_a?(Hash) + opt.flat_map {|k, v| [k.to_s, v.to_s] } + elsif opt.is_a?(String) + opt else - converted_req + opt.to_s end - rescue StandardError - Gem::Requirement.default end + end + end - d.instance_variable_set(:@requirement, requirement) - - type = obj["type"] - if type - type = type.to_s.sub(/^:/, "").to_sym - else - type = :runtime - end - if permitted_symbols.any? && !permitted_symbols.include?(type.to_s) - raise ArgumentError, "Disallowed symbol: #{type.inspect}" - end - d.instance_variable_set(:@type, type) + def normalize_array_fields!(hash) + ARRAY_FIELDS.each do |field| + next if field == "rdoc_options" # already handled + hash[field] = normalize_array_field(hash[field]) if hash[field] + end + end - d.instance_variable_set(:@prerelease, ["true", true].include?(obj["prerelease"])) - d.instance_variable_set(:@version_requirements, d.instance_variable_get(:@requirement)) - d + def normalize_array_field(value) + if value.is_a?(Hash) + value.values.flatten.compact + elsif !value.is_a?(Array) && value + [value].flatten.compact else - res = Hash.new - obj.each do |k, v| - next if k == :tag - key_str = k.to_s - converted_val = convert_any(v, permitted_symbols) - - # Convert Hash to Array for fields that should be arrays - if ["rdoc_options", "files", "test_files", "executables", "requirements", "extra_rdoc_files"].include?(key_str) - if converted_val.is_a?(Hash) - converted_val = converted_val.values.flatten.compact - elsif !converted_val.is_a?(Array) && converted_val - converted_val = [converted_val].flatten.compact - end - end - - res[key_str] = converted_val - end - res + value end - elsif obj.is_a?(Array) - obj.map {|i| convert_any(i, permitted_symbols) } - else - obj end end - def strip_comment(val) - return val unless val.include?("#") - return val if val.lstrip.start_with?("#") + class Emitter + def emit(obj) + "---#{emit_node(obj, 0)}" + end + + private + + def emit_node(obj, indent, quote: false) + case obj + when Gem::Specification then emit_specification(obj, indent) + when Gem::Version then emit_version(obj, indent) + when Gem::Platform then emit_platform(obj, indent) + when Gem::Requirement then emit_requirement(obj, indent) + when Gem::Dependency then emit_dependency(obj, indent) + when Hash then emit_hash(obj, indent) + when Array then emit_array(obj, indent) + when Time then emit_time(obj) + when String then emit_string(obj, indent, quote: quote) + when Numeric, Symbol, TrueClass, FalseClass, nil + " #{obj.inspect}\n" + else + " #{obj.to_s.inspect}\n" + end + end - in_single = false - in_double = false - escape = false + def emit_specification(spec, indent) + parts = [" !ruby/object:Gem::Specification\n"] + parts << "#{pad(indent)}name:#{emit_node(spec.name, indent + 2)}" + parts << "#{pad(indent)}version:#{emit_node(spec.version, indent + 2)}" + parts << "#{pad(indent)}platform: #{spec.platform}\n" + if spec.platform.to_s != spec.original_platform.to_s + parts << "#{pad(indent)}original_platform: #{spec.original_platform}\n" + end - val.each_char.with_index do |ch, i| - if escape - escape = false - next + attributes = Gem::Specification.attribute_names.map(&:to_s).sort - %w[name version platform] + attributes.each do |name| + val = spec.instance_variable_get("@#{name}") + next if val.nil? + parts << "#{pad(indent)}#{name}:#{emit_node(val, indent + 2)}" end - if in_single - in_single = false if ch == "'" - elsif in_double - if ch == "\\" - escape = true - elsif ch == '"' - in_double = false + res = parts.join + res << "\n" unless res.end_with?("\n") + res + end + + def emit_version(ver, indent) + " !ruby/object:Gem::Version\n" \ + "#{pad(indent)}version: #{emit_node(ver.version.to_s, indent + 2).lstrip}" + end + + def emit_platform(plat, indent) + " !ruby/object:Gem::Platform\n" \ + "#{pad(indent)}cpu: #{plat.cpu.inspect}\n" \ + "#{pad(indent)}os: #{plat.os.inspect}\n" \ + "#{pad(indent)}version: #{plat.version.inspect}\n" + end + + def emit_requirement(req, indent) + " !ruby/object:Gem::Requirement\n" \ + "#{pad(indent)}requirements:#{emit_node(req.requirements, indent + 2)}" + end + + def emit_dependency(dep, indent) + [ + " !ruby/object:Gem::Dependency\n", + "#{pad(indent)}name: #{emit_node(dep.name, indent + 2).lstrip}", + "#{pad(indent)}requirement:#{emit_node(dep.requirement, indent + 2)}", + "#{pad(indent)}type: #{emit_node(dep.type, indent + 2).lstrip}", + "#{pad(indent)}prerelease: #{emit_node(dep.prerelease?, indent + 2).lstrip}", + "#{pad(indent)}version_requirements:#{emit_node(dep.requirement, indent + 2)}", + ].join + end + + def emit_hash(hash, indent) + if hash.empty? + " {}\n" + else + parts = ["\n"] + hash.each do |k, v| + is_symbol = k.is_a?(Symbol) || (k.is_a?(String) && k.start_with?(":")) + key_str = k.is_a?(Symbol) ? k.inspect : k.to_s + parts << "#{pad(indent)}#{key_str}:#{emit_node(v, indent + 2, quote: is_symbol)}" end + parts.join + end + end + + def emit_array(arr, indent) + if arr.empty? + " []\n" else - case ch - when "'" - in_single = true - when '"' - in_double = true - when "#" - return val[0...i].rstrip + parts = ["\n"] + arr.each do |v| + parts << "#{pad(indent)}-#{emit_node(v, indent + 2)}" end + parts.join end end - val - end + def emit_time(time) + " #{time.utc.strftime("%Y-%m-%d %H:%M:%S.%N Z")}\n" + end - def unquote_simple(val) - # Strip YAML non-specific tag (! prefix), e.g. ! '>=' -> '>=' - val = val.sub(/^! /, "") if val.start_with?("! ") - - if val =~ /^"(.*)"$/ - $1.gsub(/\\"/, '"').gsub(/\\n/, "\n").gsub(/\\r/, "\r").gsub(/\\t/, "\t").gsub(/\\\\/, "\\") - elsif val =~ /^'(.*)'$/ - $1.gsub(/''/, "'") - elsif val == "true" - true - elsif val == "false" - false - elsif val == "nil" - nil - elsif val == "{}" - {} - elsif val =~ /^\[(.*)\]$/ - inner = $1.strip - return [] if inner.empty? - inner.split(/\s*,\s*/).reject(&:empty?).map {|element| unquote_simple(element) } - elsif /^\d{4}-\d{2}-\d{2}/.match?(val) - require "time" - begin - Time.parse(val) - rescue ArgumentError - val + def emit_string(str, indent, quote: false) + if str.include?("\n") + emit_block_scalar(str, indent) + elsif needs_quoting?(str, quote) + " #{str.to_s.inspect}\n" + else + " #{str}\n" end - elsif /^-?\d+$/.match?(val) - val.to_i - else - val end + + def emit_block_scalar(str, indent) + parts = [str.end_with?("\n") ? " |\n" : " |-\n"] + str.each_line do |line| + parts << "#{pad(indent + 2)}#{line}" + end + res = parts.join + res << "\n" unless res.end_with?("\n") + res + end + + def needs_quoting?(str, quote) + quote || str.empty? || + str =~ /^[!*&:@%$]/ || str =~ /^-?\d+(\.\d+)?$/ || str =~ /^[<>=-]/ || + str == "true" || str == "false" || str == "nil" || + str.include?(":") || str.include?("#") || str.include?("[") || str.include?("]") || + str.include?("{") || str.include?("}") || str.include?(",") + end + + def pad(indent) + " " * indent + end + end + + module_function + + def dump(obj) + Emitter.new.emit(obj) + end + + def load(str, permitted_classes: [], permitted_symbols: [], aliases: true) + return {} if str.nil? || str.empty? + + ast = Parser.new(str).parse + return {} if ast.nil? + + Builder.new( + permitted_classes: permitted_classes, + permitted_symbols: permitted_symbols, + aliases: aliases + ).build(ast) end end end From 2c9e4befbf014b3eff2e281aeb276fcf780055ca Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 12:17:11 +0900 Subject: [PATCH 07/27] [ruby/rubygems] Add use_psych config and make YAMLSerializer default YAML backend Add Gem.use_psych? and Gem.load_yaml branching so that YAMLSerializer is used by default, while Psych remains available via the use_psych config option in .gemrc or RUBYGEMS_USE_PSYCH environment variable. https://github.com/ruby/rubygems/commit/d67561aa06 Co-Authored-By: Claude Opus 4.6 --- lib/rubygems.rb | 23 +++++++++++++++++++++-- lib/rubygems/config_file.rb | 20 ++++++++++++++++---- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/lib/rubygems.rb b/lib/rubygems.rb index 490c81821dafd7..baf0599ee6bbd7 100644 --- a/lib/rubygems.rb +++ b/lib/rubygems.rb @@ -640,6 +640,14 @@ def self.add_to_load_path(*paths) end @yaml_loaded = false + @use_psych = nil + + ## + # Returns true if the Psych YAML parser is enabled via configuration. + + def self.use_psych? + @use_psych || false + end ## # Loads YAML, preferring Psych @@ -647,9 +655,20 @@ def self.add_to_load_path(*paths) def self.load_yaml return if @yaml_loaded - require "psych" - require_relative "rubygems/psych_tree" + @use_psych = ENV["RUBYGEMS_USE_PSYCH"] == "true" || + (defined?(@configuration) && @configuration && !!@configuration[:use_psych]) + + if @use_psych + # Remove Psych stubs (defined by yaml_serializer.rb) before loading + # real Psych to avoid superclass mismatch errors + if defined?(Psych) && !defined?(Psych::VERSION) + Object.send(:remove_const, :Psych) + end + require "psych" + require_relative "rubygems/psych_tree" + end + require_relative "rubygems/yaml_serializer" require_relative "rubygems/safe_yaml" @yaml_loaded = true diff --git a/lib/rubygems/config_file.rb b/lib/rubygems/config_file.rb index 06ca73157a1916..bd66aa258dad22 100644 --- a/lib/rubygems/config_file.rb +++ b/lib/rubygems/config_file.rb @@ -49,6 +49,7 @@ class Gem::ConfigFile DEFAULT_IPV4_FALLBACK_ENABLED = false DEFAULT_INSTALL_EXTENSION_IN_LIB = false DEFAULT_GLOBAL_GEM_CACHE = false + DEFAULT_USE_PSYCH = false ## # For Ruby packagers to set configuration defaults. Set in @@ -161,6 +162,11 @@ class Gem::ConfigFile attr_accessor :global_gem_cache + ## + # Use Psych (C extension YAML parser) instead of the pure Ruby YAMLSerializer. + + attr_accessor :use_psych + ## # Path name of directory or file of openssl client certificate, used for remote https connection with client authentication @@ -199,6 +205,7 @@ def initialize(args) @install_extension_in_lib = DEFAULT_INSTALL_EXTENSION_IN_LIB @ipv4_fallback_enabled = ENV["IPV4_FALLBACK_ENABLED"] == "true" || DEFAULT_IPV4_FALLBACK_ENABLED @global_gem_cache = ENV["RUBYGEMS_GLOBAL_GEM_CACHE"] == "true" || DEFAULT_GLOBAL_GEM_CACHE + @use_psych = ENV["RUBYGEMS_USE_PSYCH"] == "true" || DEFAULT_USE_PSYCH operating_system_config = Marshal.load Marshal.dump(OPERATING_SYSTEM_DEFAULTS) platform_config = Marshal.load Marshal.dump(PLATFORM_DEFAULTS) @@ -221,7 +228,7 @@ def initialize(args) # gemhome and gempath are not working with symbol keys if %w[backtrace bulk_threshold verbose update_sources cert_expiration_length_days concurrent_downloads install_extension_in_lib ipv4_fallback_enabled - global_gem_cache sources + global_gem_cache use_psych sources disable_default_gem_server ssl_verify_mode ssl_ca_cert ssl_client_cert].include?(k) k.to_sym else @@ -239,6 +246,7 @@ def initialize(args) @install_extension_in_lib = @hash[:install_extension_in_lib] if @hash.key? :install_extension_in_lib @ipv4_fallback_enabled = @hash[:ipv4_fallback_enabled] if @hash.key? :ipv4_fallback_enabled @global_gem_cache = @hash[:global_gem_cache] if @hash.key? :global_gem_cache + @use_psych = @hash[:use_psych] if @hash.key? :use_psych @home = @hash[:gemhome] if @hash.key? :gemhome @path = @hash[:gempath] if @hash.key? :gempath @@ -378,7 +386,9 @@ def load_file(filename) begin config = self.class.load_with_rubygems_config_hash(File.read(filename)) - if config.keys.any? {|k| k.to_s.gsub(%r{https?:\/\/}, "").include?(": ") } + has_invalid_keys = config.keys.any? {|k| k.to_s.gsub(%r{https?:\/\/}, "").include?(": ") } + has_invalid_values = config.values.any? {|v| v.is_a?(String) && v.gsub(%r{https?:\/\/}, "").match?(/\A\S+: /) } + if has_invalid_keys || has_invalid_values warn "Failed to load #{filename} because it doesn't contain valid YAML hash" return {} else @@ -563,7 +573,9 @@ def self.dump_with_rubygems_yaml(content) def self.load_with_rubygems_config_hash(yaml) require_relative "yaml_serializer" - content = Gem::YAMLSerializer.load(yaml) + content = Gem::YAMLSerializer.load(yaml, permitted_classes: []) + return {} unless content.is_a?(Hash) + deep_transform_config_keys!(content) end @@ -597,7 +609,7 @@ def self.deep_transform_config_keys!(config) else v end - elsif v.empty? + elsif v.respond_to?(:empty?) && v.empty? nil elsif v.is_a?(Hash) deep_transform_config_keys!(v) From 852e7cfab4b7a07300948634bb072441b6bcc1a1 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 12:17:16 +0900 Subject: [PATCH 08/27] [ruby/rubygems] Use YAMLSerializer in SafeYAML with Psych fallback https://github.com/ruby/rubygems/commit/d81ae0a870 Co-Authored-By: Claude Opus 4.6 --- lib/rubygems/safe_yaml.rb | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/lib/rubygems/safe_yaml.rb b/lib/rubygems/safe_yaml.rb index 6a02a482304dc0..1c15f10eb16661 100644 --- a/lib/rubygems/safe_yaml.rb +++ b/lib/rubygems/safe_yaml.rb @@ -35,11 +35,45 @@ def self.aliases_enabled? # :nodoc: end def self.safe_load(input) - ::Psych.safe_load(input, permitted_classes: PERMITTED_CLASSES, permitted_symbols: PERMITTED_SYMBOLS, aliases: @aliases_enabled) + if Gem.use_psych? + ::Psych.safe_load(input, permitted_classes: PERMITTED_CLASSES, + permitted_symbols: PERMITTED_SYMBOLS, aliases: @aliases_enabled) + else + Gem::YAMLSerializer.load( + input, + permitted_classes: PERMITTED_CLASSES, + permitted_symbols: PERMITTED_SYMBOLS, + aliases: aliases_enabled? + ) + end end def self.load(input) - ::Psych.safe_load(input, permitted_classes: [::Symbol]) + if Gem.use_psych? + ::Psych.safe_load(input, permitted_classes: [::Symbol]) + else + Gem::YAMLSerializer.load( + input, + permitted_classes: [::Symbol] + ) + end + end + + def self.safe_load(input) + if Gem.use_psych? + if ::Psych.respond_to?(:unsafe_load) + ::Psych.unsafe_load(input) + else + ::Psych.load(input) + end + else + Gem::YAMLSerializer.load( + input, + permitted_classes: PERMITTED_CLASSES, + permitted_symbols: PERMITTED_SYMBOLS, + aliases: aliases_enabled? + ) + end end end end From 9e85f2c2bd182d5edcf899d62bbc786ce7f401cc Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 12:17:22 +0900 Subject: [PATCH 09/27] [ruby/rubygems] Use YAMLSerializer in Specification with Psych fallback https://github.com/ruby/rubygems/commit/b4655ddeb2 Co-Authored-By: Claude Opus 4.6 --- lib/rubygems/safe_marshal.rb | 1 + lib/rubygems/specification.rb | 36 +++++++++++++++++++---------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/lib/rubygems/safe_marshal.rb b/lib/rubygems/safe_marshal.rb index b81d1a0a475a00..871f24727dcb44 100644 --- a/lib/rubygems/safe_marshal.rb +++ b/lib/rubygems/safe_marshal.rb @@ -54,6 +54,7 @@ module SafeMarshal "Gem::NameTuple" => %w[@name @version @platform], "Gem::Platform" => %w[@os @cpu @version], "Psych::PrivateType" => %w[@value @type_id], + "YAML::PrivateType" => %w[@value @type_id], }.freeze private_constant :PERMITTED_IVARS diff --git a/lib/rubygems/specification.rb b/lib/rubygems/specification.rb index 64f289a7b41e50..d852332db7f92f 100644 --- a/lib/rubygems/specification.rb +++ b/lib/rubygems/specification.rb @@ -1275,7 +1275,7 @@ def self._load(str) raise unless message.include?("YAML::") unless Object.const_defined?(:YAML) - Object.const_set "YAML", Psych + Object.const_set "YAML", Module.new yaml_set = true end @@ -1284,7 +1284,7 @@ def self._load(str) YAML::Syck.const_set "DefaultKey", Class.new if message.include?("YAML::Syck::DefaultKey") && !YAML::Syck.const_defined?(:DefaultKey) elsif message.include?("YAML::PrivateType") && !YAML.const_defined?(:PrivateType) - YAML.const_set "PrivateType", Class.new + YAML.const_set "PrivateType", Class.new { attr_accessor :type_id, :value } end retry_count += 1 @@ -2455,24 +2455,28 @@ def to_spec def to_yaml(opts = {}) # :nodoc: Gem.load_yaml - # Because the user can switch the YAML engine behind our - # back, we have to check again here to make sure that our - # psych code was properly loaded, and load it if not. - unless Gem.const_defined?(:NoAliasYAMLTree) - require_relative "psych_tree" - end + if Gem.use_psych? + # Because the user can switch the YAML engine behind our + # back, we have to check again here to make sure that our + # psych code was properly loaded, and load it if not. + unless Gem.const_defined?(:NoAliasYAMLTree) + require_relative "psych_tree" + end - builder = Gem::NoAliasYAMLTree.create - builder << self - ast = builder.tree + builder = Gem::NoAliasYAMLTree.create + builder << self + ast = builder.tree - require "stringio" - io = StringIO.new - io.set_encoding Encoding::UTF_8 + require "stringio" + io = StringIO.new + io.set_encoding Encoding::UTF_8 - Psych::Visitors::Emitter.new(io).accept(ast) + Psych::Visitors::Emitter.new(io).accept(ast) - io.string.gsub(/ !!null \n/, " \n") + io.string.gsub(/ !!null \n/, " \n") + else + Gem::YAMLSerializer.dump(self) + end end ## From e954bd2b5076e1593602eca48a17e25cdb0541d3 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 12:17:27 +0900 Subject: [PATCH 10/27] [ruby/rubygems] Use YAMLSerializer in Package with Psych fallback https://github.com/ruby/rubygems/commit/21c33bb482 Co-Authored-By: Claude Opus 4.6 --- lib/rubygems/package.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/rubygems/package.rb b/lib/rubygems/package.rb index e6e078dce40680..c433cf1a77983a 100644 --- a/lib/rubygems/package.rb +++ b/lib/rubygems/package.rb @@ -232,7 +232,11 @@ def add_checksums(tar) tar.add_file_signed "checksums.yaml.gz", 0o444, @signer do |io| gzip_to io do |gz_io| - Psych.dump checksums_by_algorithm, gz_io + if Gem.use_psych? + Psych.dump checksums_by_algorithm, gz_io + else + gz_io.write Gem::YAMLSerializer.dump(checksums_by_algorithm) + end end end end From 6167a6c905b5613148549fd2023cb62dcdcff752 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 12:17:42 +0900 Subject: [PATCH 11/27] [ruby/rubygems] Use YAMLSerializer in specification_command with Psych fallback https://github.com/ruby/rubygems/commit/895c8799fc Co-Authored-By: Claude Opus 4.6 --- lib/rubygems/commands/specification_command.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rubygems/commands/specification_command.rb b/lib/rubygems/commands/specification_command.rb index a21ed35be316a4..ec81917d2a88ff 100644 --- a/lib/rubygems/commands/specification_command.rb +++ b/lib/rubygems/commands/specification_command.rb @@ -147,7 +147,7 @@ def execute say case options[:format] when :ruby then s.to_ruby when :marshal then Marshal.dump s - else s.to_yaml + else Gem.use_psych? ? s.to_yaml : Gem::YAMLSerializer.dump(s) end say "\n" From 2781b19ca6c6d6da83f45c77f29d75ce1437ea5e Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 12:17:48 +0900 Subject: [PATCH 12/27] [ruby/rubygems] Update test helpers for YAMLSerializer https://github.com/ruby/rubygems/commit/9d54d0f830 Co-Authored-By: Claude Opus 4.6 --- test/rubygems/helper.rb | 6 +----- test/rubygems/test_gem_commands_owner_command.rb | 4 +--- test/rubygems/test_gem_package.rb | 12 ++++++++++-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index dc40f4ecb1f8ec..ec373d41e0202a 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -738,11 +738,7 @@ def write_dummy_extconf(gem_name) # Load a YAML string, the psych 3 way def load_yaml(yaml) - if Psych.respond_to?(:unsafe_load) - Psych.unsafe_load(yaml) - else - Psych.load(yaml) - end + Gem::SafeYAML.safe_load(yaml) end ## diff --git a/test/rubygems/test_gem_commands_owner_command.rb b/test/rubygems/test_gem_commands_owner_command.rb index 80b1497c415ae9..be4eee00e63cd0 100644 --- a/test/rubygems/test_gem_commands_owner_command.rb +++ b/test/rubygems/test_gem_commands_owner_command.rb @@ -55,8 +55,6 @@ def test_show_owners end def test_show_owners_dont_load_objects - pend "testing a psych-only API" unless defined?(::Psych::DisallowedClass) - response = < Date: Mon, 9 Mar 2026 12:17:53 +0900 Subject: [PATCH 13/27] [ruby/rubygems] Update bundler inline spec expectations https://github.com/ruby/rubygems/commit/825d4eba3c Co-Authored-By: Claude Opus 4.6 --- spec/bundler/runtime/inline_spec.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spec/bundler/runtime/inline_spec.rb b/spec/bundler/runtime/inline_spec.rb index e55d029a4b689f..daf966f458c45d 100644 --- a/spec/bundler/runtime/inline_spec.rb +++ b/spec/bundler/runtime/inline_spec.rb @@ -678,8 +678,10 @@ def confirm(msg, newline = nil) expect(out).to include("Installing psych 999") expect(out).to include("Installing stringio 999") - expect(out).to include("The psych gem was resolved to 999") - expect(out).to include("The stringio gem was resolved to 999") + if Gem.respond_to?(:use_psych?) && Gem.use_psych? + expect(out).to include("The psych gem was resolved to 999") + expect(out).to include("The stringio gem was resolved to 999") + end end it "leaves a lockfile in the same directory as the inline script alone" do From 1cd2cc2cdfb8c811be8e88a33cbc3a1b6665b8d3 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 10:46:07 +0900 Subject: [PATCH 14/27] [ruby/rubygems] Use Psych-specific YAML error classes https://github.com/ruby/rubygems/commit/e07e88a232 --- lib/rubygems/yaml_serializer.rb | 17 +++++++++++++---- .../rubygems/test_gem_commands_owner_command.rb | 4 +++- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index 720911194e1d32..878c958a46bb1b 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -1,5 +1,14 @@ # frozen_string_literal: true +unless defined?(Psych) + module Psych + class SyntaxError < ::StandardError; end + class DisallowedClass < ::ArgumentError; end + class BadAlias < ::ArgumentError; end + class AliasesNotEnabled < BadAlias; end + end +end + module Gem module YAMLSerializer Scalar = Struct.new(:value, :tag, :anchor, keyword_init: true) @@ -378,7 +387,7 @@ def build_node(node) end def resolve_alias(node) - raise ArgumentError, "YAML aliases are not allowed" unless @aliases + raise Psych::AliasesNotEnabled, "YAML aliases are not allowed" unless @aliases @anchor_values.fetch(node.name, nil) end @@ -530,19 +539,19 @@ def build_safe_requirement(req_value) def validate_tag!(tag) unless @permitted_tags.include?(tag) - raise ArgumentError, "Disallowed class: #{tag}" + raise Psych::DisallowedClass, "Disallowed class: #{tag}" end end def validate_symbol!(sym) if @permitted_symbols.any? && !@permitted_symbols.include?(sym.to_s) - raise ArgumentError, "Disallowed symbol: #{sym.inspect}" + raise Psych::DisallowedClass, "Disallowed symbol: #{sym.inspect}" end end def check_anchor!(node) if node.anchor - raise ArgumentError, "YAML aliases are not allowed" unless @aliases + raise Psych::AliasesNotEnabled, "YAML aliases are not allowed" unless @aliases end end diff --git a/test/rubygems/test_gem_commands_owner_command.rb b/test/rubygems/test_gem_commands_owner_command.rb index be4eee00e63cd0..80b1497c415ae9 100644 --- a/test/rubygems/test_gem_commands_owner_command.rb +++ b/test/rubygems/test_gem_commands_owner_command.rb @@ -55,6 +55,8 @@ def test_show_owners end def test_show_owners_dont_load_objects + pend "testing a psych-only API" unless defined?(::Psych::DisallowedClass) + response = < Date: Fri, 6 Mar 2026 16:22:01 +0900 Subject: [PATCH 15/27] [ruby/rubygems] Simplify indentation handling in YAML serializer https://github.com/ruby/rubygems/commit/50becac99a --- lib/rubygems/yaml_serializer.rb | 66 +++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index 878c958a46bb1b..6c89cf2a9a2d53 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -71,23 +71,28 @@ def parse_node(base_indent) skip_blank_and_comments return nil if @lines.empty? - indent = @lines[0][/^ */].size - return nil if indent < base_indent - line = @lines[0] + stripped = line.lstrip + indent = line.size - stripped.size + return nil if indent < base_indent - return parse_alias_ref if line.lstrip.start_with?("*") + return parse_alias_ref if stripped.start_with?("*") anchor = consume_anchor - if line.lstrip.start_with?("- ") || line.lstrip == "-" + if anchor + line = @lines[0] + stripped = line.lstrip + end + + if stripped.start_with?("- ") || stripped == "-" parse_sequence(indent, anchor) - elsif line.lstrip =~ MAPPING_KEY_RE && !line.lstrip.start_with?("!ruby/object:") + elsif stripped =~ MAPPING_KEY_RE && !stripped.start_with?("!ruby/object:") parse_mapping(indent, anchor) - elsif line.lstrip.start_with?("!ruby/object:") + elsif stripped.start_with?("!ruby/object:") parse_tagged_node(indent, anchor) - elsif line.lstrip.start_with?("|") - modifier = line.lstrip[1..].to_s.strip + elsif stripped.start_with?("|") + modifier = stripped[1..].to_s.strip @lines.shift register_anchor(anchor, Scalar.new(value: parse_block_scalar(indent, modifier))) else @@ -97,8 +102,11 @@ def parse_node(base_indent) def parse_sequence(indent, anchor) items = [] - while @lines.any? && @lines[0][/^ */].size == indent && - (@lines[0].lstrip.start_with?("- ") || @lines[0].lstrip == "-") + while @lines.any? + line = @lines[0] + stripped = line.lstrip + break unless line.size - stripped.size == indent && + (stripped.start_with?("- ") || stripped == "-") content = @lines.shift.lstrip[1..].strip item_anchor, content = extract_item_anchor(content) item = parse_sequence_item(content, indent) @@ -111,7 +119,7 @@ def parse_sequence_item(content, indent) if content.start_with?("*") parse_inline_alias(content) elsif content.empty? - @lines.any? && @lines[0][/^ */].size > indent ? parse_node(indent) : nil + @lines.any? && current_indent > indent ? parse_node(indent) : nil elsif content.start_with?("!ruby/object:") parse_tagged_content(content.strip, indent) elsif content.start_with?("-") @@ -129,11 +137,13 @@ def parse_sequence_item(content, indent) def parse_mapping(indent, anchor) pairs = [] - while @lines.any? && @lines[0][/^ */].size == indent && - @lines[0].lstrip =~ MAPPING_KEY_RE && !@lines[0].lstrip.start_with?("!ruby/object:") - l = @lines.shift - l.lstrip =~ MAPPING_KEY_RE + while @lines.any? + line = @lines[0] + stripped = line.lstrip + break unless line.size - stripped.size == indent && + stripped =~ MAPPING_KEY_RE && !stripped.start_with?("!ruby/object:") key = $1.strip + @lines.shift val = strip_comment($2.to_s.strip) val_anchor, val = consume_value_anchor(val) @@ -151,9 +161,13 @@ def parse_mapping_value(val, indent) elsif val.start_with?("!ruby/object:") parse_tagged_content(val.strip, indent) elsif val.empty? + if @lines.any? + next_stripped = @lines[0].lstrip + next_indent = @lines[0].size - next_stripped.size + end if @lines.any? && - (@lines[0].lstrip.start_with?("- ") || @lines[0].lstrip == "-") && - @lines[0][/^ */].size == indent + (next_stripped.start_with?("- ") || next_stripped == "-") && + next_indent == indent parse_node(indent) else parse_node(indent + 1) @@ -170,7 +184,7 @@ def parse_mapping_value(val, indent) end def parse_tagged_node(indent, anchor) - tag = @lines.shift.lstrip.strip + tag = @lines.shift.strip nested = parse_node(indent) apply_tag(nested, tag, anchor) end @@ -195,11 +209,12 @@ def parse_block_scalar(base_indent, modifier) block_indent = nil while @lines.any? - if @lines[0].strip.empty? + line = @lines[0] + if line.strip.empty? parts << "\n" @lines.shift else - line_indent = @lines[0][/^ */].size + line_indent = line.size - line.lstrip.size break if line_indent <= base_indent block_indent ||= line_indent parts << @lines.shift[block_indent..].to_s << "\n" @@ -216,7 +231,7 @@ def parse_plain_scalar(indent, anchor) return register_anchor(anchor, result) if result.is_a?(Mapping) || result.is_a?(Sequence) while result.is_a?(String) && @lines.any? && - !@lines[0].strip.empty? && @lines[0][/^ */].size > indent + !@lines[0].strip.empty? && current_indent > indent result << " " << @lines.shift.strip end register_anchor(anchor, Scalar.new(value: result)) @@ -227,7 +242,7 @@ def parse_inline_scalar(val, indent) return result if result.is_a?(Mapping) || result.is_a?(Sequence) while result.is_a?(String) && @lines.any? && - !@lines[0].strip.empty? && @lines[0][/^ */].size > indent + !@lines[0].strip.empty? && current_indent > indent result << " " << @lines.shift.strip end Scalar.new(value: result) @@ -275,6 +290,11 @@ def parse_inline_alias(content) AliasRef.new(name: content[1..].strip) end + def current_indent + line = @lines[0] + line.size - line.lstrip.size + end + def consume_anchor line = @lines[0] return nil unless line.lstrip =~ /^&(\S+)\s+/ From 00e054f21a105712fa15e5535e42c0be3eea728b Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 6 Mar 2026 16:28:46 +0900 Subject: [PATCH 16/27] [ruby/rubygems] Optimize YAML serializer line handling https://github.com/ruby/rubygems/commit/ef022c664f --- lib/rubygems/yaml_serializer.rb | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index 6c89cf2a9a2d53..89a3538a80e077 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -31,7 +31,7 @@ class Parser MAPPING_KEY_RE = /^((?:[^#:]|:[^ ])+):(?:[ ]+(.*))?$/ def initialize(source) - @lines = source.split(/\r?\n/) + @lines = source.split("\n") @anchors = {} strip_document_prefix end @@ -297,7 +297,8 @@ def current_indent def consume_anchor line = @lines[0] - return nil unless line.lstrip =~ /^&(\S+)\s+/ + stripped = line.lstrip + return nil unless stripped.start_with?("&") && stripped =~ /^&(\S+)\s+/ anchor = $1 @lines[0] = line.sub(/&#{Regexp.escape(anchor)}\s+/, "") @@ -327,8 +328,12 @@ def register_anchor(name, node) end def skip_blank_and_comments - @lines.shift while @lines.any? && - (@lines[0].strip.empty? || @lines[0].lstrip.start_with?("#")) + while @lines.any? + line = @lines[0] + stripped = line.lstrip + break unless stripped.empty? || stripped.start_with?("#") + @lines.shift + end end def strip_comment(val) From 8a19f693e7cc9b2736085b8d54eae6bbe242c10a Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 6 Mar 2026 16:53:25 +0900 Subject: [PATCH 17/27] [ruby/rubygems] Guard against nil next line in YAML serializer https://github.com/ruby/rubygems/commit/faab31b5cf --- lib/rubygems/yaml_serializer.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index 89a3538a80e077..88b0100f2183ad 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -161,11 +161,13 @@ def parse_mapping_value(val, indent) elsif val.start_with?("!ruby/object:") parse_tagged_content(val.strip, indent) elsif val.empty? + next_stripped = nil + next_indent = nil if @lines.any? next_stripped = @lines[0].lstrip next_indent = @lines[0].size - next_stripped.size end - if @lines.any? && + if next_stripped && (next_stripped.start_with?("- ") || next_stripped == "-") && next_indent == indent parse_node(indent) From fad2934d9a3d50b06d0b58e8bc4115942964c05c Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 6 Mar 2026 13:38:57 +0900 Subject: [PATCH 18/27] [ruby/rubygems] Add comprehensive SafeYAML and YAMLSerializer tests Add tests covering the full pure-Ruby YAML implementation: - Gem object serialization round-trips (dump and load) - YAML anchors and aliases (enabled and disabled) - Permitted classes and symbols validation - Real-world gemspec parsing (fileutils, rubygems-bundler) - Edge cases: empty requirements, Hash-to-Array normalization, rdoc_options conversion, flow notation, non-specific tags, comment-only documents, special character quoting https://github.com/ruby/rubygems/commit/b38681e4e8 --- test/rubygems/test_gem_safe_yaml.rb | 550 ++++++++++++++++++++++++++++ 1 file changed, 550 insertions(+) diff --git a/test/rubygems/test_gem_safe_yaml.rb b/test/rubygems/test_gem_safe_yaml.rb index 02df9f97da58ac..63cb91d29726f2 100644 --- a/test/rubygems/test_gem_safe_yaml.rb +++ b/test/rubygems/test_gem_safe_yaml.rb @@ -6,11 +6,13 @@ class TestGemSafeYAML < Gem::TestCase def test_aliases_enabled_by_default + pend "Psych is not loaded" if defined?(Gem::YAMLSerializer) assert_predicate Gem::SafeYAML, :aliases_enabled? assert_equal({ "a" => "a", "b" => "a" }, Gem::SafeYAML.safe_load("a: &a a\nb: *a\n")) end def test_aliases_disabled + pend "Psych is not loaded" if defined?(Gem::YAMLSerializer) aliases_enabled = Gem::SafeYAML.aliases_enabled? Gem::SafeYAML.aliases_enabled = false refute_predicate Gem::SafeYAML, :aliases_enabled? @@ -21,4 +23,552 @@ def test_aliases_disabled ensure Gem::SafeYAML.aliases_enabled = aliases_enabled end + + def test_specification_version_is_integer + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: test + version: !ruby/object:Gem::Version + version: 1.0.0 + specification_version: 4 + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Integer, spec.specification_version + assert_equal 4, spec.specification_version + end + + def test_disallowed_class_rejected + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + yaml = <<~YAML + --- !ruby/object:SomeDisallowedClass + foo: bar + YAML + + exception = assert_raise(ArgumentError) do + Gem::SafeYAML.safe_load(yaml) + end + assert_match(/Disallowed class/, exception.message) + end + + def test_disallowed_symbol_rejected + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + yaml = <<~YAML + --- !ruby/object:Gem::Dependency + name: test + requirement: !ruby/object:Gem::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: 0 + type: :invalid_type + prerelease: false + version_requirements: !ruby/object:Gem::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: 0 + YAML + + exception = assert_raise(ArgumentError) do + Gem::SafeYAML.safe_load(yaml) + end + assert_match(/Disallowed symbol/, exception.message) + end + + def test_yaml_serializer_aliases_disabled + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + aliases_enabled = Gem::SafeYAML.aliases_enabled? + Gem::SafeYAML.aliases_enabled = false + refute_predicate Gem::SafeYAML, :aliases_enabled? + + yaml = "a: &anchor value\nb: *anchor\n" + + exception = assert_raise(ArgumentError) do + Gem::SafeYAML.safe_load(yaml) + end + assert_match(/YAML aliases are not allowed/, exception.message) + ensure + Gem::SafeYAML.aliases_enabled = aliases_enabled + end + + def test_real_gemspec_fileutils + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: fileutils + version: !ruby/object:Gem::Version + version: 1.8.0 + platform: ruby + authors: + - Minero Aoki + bindir: bin + cert_chain: [] + date: 1980-01-02 00:00:00.000000000 Z + dependencies: [] + description: Several file utility methods for copying, moving, removing, etc. + email: + - + executables: [] + extensions: [] + extra_rdoc_files: [] + files: + - BSDL + - COPYING + - README.md + - Rakefile + - fileutils.gemspec + - lib/fileutils.rb + homepage: https://github.com/ruby/fileutils + licenses: + - Ruby + - BSD-2-Clause + metadata: + source_code_uri: https://github.com/ruby/fileutils + rdoc_options: [] + require_paths: + - lib + required_ruby_version: !ruby/object:Gem::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: 2.5.0 + required_rubygems_version: !ruby/object:Gem::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: '0' + requirements: [] + rubygems_version: 3.6.9 + specification_version: 4 + summary: Several file utility methods for copying, moving, removing, etc. + test_files: [] + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Gem::Specification, spec + assert_equal "fileutils", spec.name + assert_equal Gem::Version.new("1.8.0"), spec.version + assert_kind_of Integer, spec.specification_version + assert_equal 4, spec.specification_version + end + + def test_yaml_anchor_and_alias_enabled + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + aliases_enabled = Gem::SafeYAML.aliases_enabled? + Gem::SafeYAML.aliases_enabled = true + + yaml = <<~YAML + dependencies: + - &req !ruby/object:Gem::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: '0' + - *req + YAML + + result = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Hash, result + assert_kind_of Array, result["dependencies"] + assert_equal 2, result["dependencies"].size + assert_kind_of Gem::Requirement, result["dependencies"][0] + assert_kind_of Gem::Requirement, result["dependencies"][1] + assert_equal result["dependencies"][0].requirements, result["dependencies"][1].requirements + ensure + Gem::SafeYAML.aliases_enabled = aliases_enabled + end + + def test_real_gemspec_rubygems_bundler + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: rubygems-bundler + version: !ruby/object:Gem::Version + version: 1.4.5 + platform: ruby + authors: + - Josh Hull + - Michal Papis + autorequire: + bindir: bin + cert_chain: [] + date: 2018-06-24 00:00:00.000000000 Z + dependencies: + - !ruby/object:Gem::Dependency + name: bundler-unload + requirement: !ruby/object:Gem::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: 1.0.2 + type: :runtime + prerelease: false + version_requirements: !ruby/object:Gem::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: 1.0.2 + description: Stop using bundle exec. + email: + - joshbuddy@gmail.com + - mpapis@gmail.com + executables: [] + extensions: [] + extra_rdoc_files: [] + files: + - ".gem.config" + homepage: http://mpapis.github.com/rubygems-bundler + licenses: + - Apache-2.0 + metadata: {} + post_install_message: + rdoc_options: [] + require_paths: + - lib + required_ruby_version: !ruby/object:Gem::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: '0' + rubyforge_project: + rubygems_version: 2.7.6 + signing_key: + specification_version: 4 + summary: Stop using bundle exec + test_files: [] + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Gem::Specification, spec + assert_equal "rubygems-bundler", spec.name + assert_equal Gem::Version.new("1.4.5"), spec.version + assert_equal 1, spec.dependencies.size + + dep = spec.dependencies.first + assert_equal "bundler-unload", dep.name + assert_kind_of Gem::Requirement, dep.requirement + assert_kind_of Gem::Requirement, dep.instance_variable_get(:@version_requirements) + assert_equal dep.requirement.requirements, [[">=", Gem::Version.new("1.0.2")]] + + # Empty fields should be nil + assert_nil spec.autorequire + assert_nil spec.post_install_message + + # Metadata should be empty hash + assert_equal({}, spec.metadata) + + # specification_version should be Integer + assert_kind_of Integer, spec.specification_version + assert_equal 4, spec.specification_version + end + + def test_empty_requirements_array + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: test + dependencies: + - !ruby/object:Gem::Dependency + name: foo + requirement: !ruby/object:Gem::Requirement + requirements: + type: :runtime + version_requirements: !ruby/object:Gem::Requirement + requirements: + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Gem::Specification, spec + assert_equal "test", spec.name + assert_equal 1, spec.dependencies.size + + dep = spec.dependencies.first + assert_equal "foo", dep.name + assert_kind_of Gem::Requirement, dep.requirement + + # Requirements should be empty array, not nil + reqs = dep.requirement.instance_variable_get(:@requirements) + assert_kind_of Array, reqs + assert_equal [], reqs + end + + def test_requirements_hash_converted_to_array + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + # Malformed YAML where requirements is a Hash instead of Array + yaml = <<~YAML + !ruby/object:Gem::Requirement + requirements: + foo: bar + YAML + + req = Gem::YAMLSerializer.load(yaml, permitted_classes: ["Gem::Requirement"]) + assert_kind_of Gem::Requirement, req + + # Requirements should be converted from Hash to empty Array + reqs = req.instance_variable_get(:@requirements) + assert_kind_of Array, reqs + assert_equal [], reqs + + # Should not raise error when used + assert req.satisfied_by?(Gem::Version.new("1.0")) + end + + def test_rdoc_options_hash_converted_to_array + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + # Some gemspecs incorrectly have rdoc_options: {} instead of rdoc_options: [] + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: test-gem + version: !ruby/object:Gem::Version + version: 1.0.0 + rdoc_options: {} + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Gem::Specification, spec + assert_equal "test-gem", spec.name + + # rdoc_options should be converted from Hash to Array + assert_kind_of Array, spec.rdoc_options + assert_equal [], spec.rdoc_options + end + + def test_load_returns_hash_for_comment_only_yaml + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + # Bundler config files may contain only comments after deleting all keys + result = Gem::YAMLSerializer.load("---\n# BUNDLE_FOO: \"bar\"\n") + assert_kind_of Hash, result + assert_empty result + end + + def test_load_returns_hash_for_empty_document + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + assert_equal({}, Gem::YAMLSerializer.load("---\n")) + assert_equal({}, Gem::YAMLSerializer.load("")) + assert_equal({}, Gem::YAMLSerializer.load(nil)) + end + + def test_load_returns_hash_for_flow_empty_hash + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + # Gem::YAMLSerializer.dump({}) produces "--- {}\n" + result = Gem::YAMLSerializer.load("--- {}\n") + assert_kind_of Hash, result + assert_empty result + end + + def test_load_parses_flow_empty_hash_as_value + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + result = Gem::YAMLSerializer.load("metadata: {}\n") + assert_kind_of Hash, result + assert_kind_of Hash, result["metadata"] + assert_empty result["metadata"] + end + + def test_yaml_non_specific_tag_stripped + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + # Legacy RubyGems (1.x) generated YAML with ! non-specific tags like: + # - ! '>=' + # The ! prefix should be ignored. + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: legacy-gem + version: !ruby/object:Gem::Version + version: 0.1.0 + required_ruby_version: !ruby/object:Gem::Requirement + none: false + requirements: + - - ! '>=' + - !ruby/object:Gem::Version + version: '0' + required_rubygems_version: !ruby/object:Gem::Requirement + none: false + requirements: + - - ! '>=' + - !ruby/object:Gem::Version + version: 1.3.5 + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Gem::Specification, spec + assert_equal "legacy-gem", spec.name + assert_equal Gem::Requirement.new(">= 0"), spec.required_ruby_version + assert_equal Gem::Requirement.new(">= 1.3.5"), spec.required_rubygems_version + end + + def test_legacy_gemspec_with_anchors_and_non_specific_tags + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + aliases_enabled = Gem::SafeYAML.aliases_enabled? + Gem::SafeYAML.aliases_enabled = true + + # Real-world pattern from gems like vegas-0.1.11 that combine + # YAML anchors/aliases with ! non-specific tags + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: legacy-gem + version: !ruby/object:Gem::Version + version: 0.1.11 + dependencies: + - !ruby/object:Gem::Dependency + name: rack + requirement: &id001 !ruby/object:Gem::Requirement + none: false + requirements: + - - ! '>=' + - !ruby/object:Gem::Version + version: 1.0.0 + type: :runtime + prerelease: false + version_requirements: *id001 + - !ruby/object:Gem::Dependency + name: mocha + requirement: &id002 !ruby/object:Gem::Requirement + none: false + requirements: + - - ~> + - !ruby/object:Gem::Version + version: 0.9.8 + type: :development + prerelease: false + version_requirements: *id002 + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Gem::Specification, spec + assert_equal "legacy-gem", spec.name + + assert_equal 2, spec.dependencies.size + + rack_dep = spec.dependencies.find {|d| d.name == "rack" } + assert_kind_of Gem::Dependency, rack_dep + assert_equal :runtime, rack_dep.type + assert_equal Gem::Requirement.new(">= 1.0.0"), rack_dep.requirement + + mocha_dep = spec.dependencies.find {|d| d.name == "mocha" } + assert_kind_of Gem::Dependency, mocha_dep + assert_equal :development, mocha_dep.type + assert_equal Gem::Requirement.new("~> 0.9.8"), mocha_dep.requirement + ensure + Gem::SafeYAML.aliases_enabled = aliases_enabled + end + + def test_non_specific_tag_on_plain_value + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + # ! tag on a bracketed value like rubyforge_project: ! '[none]' + result = Gem::YAMLSerializer.load("key: ! '[none]'\n") + assert_equal({ "key" => "[none]" }, result) + end + + def test_dump_quotes_dollar_sign_values + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + # Values starting with $ should be quoted to preserve them as strings + yaml = Gem::YAMLSerializer.dump({ "BUNDLE_FOO" => "$BUILD_DIR", "BUNDLE_BAR" => "baz" }) + assert_include yaml, 'BUNDLE_FOO: "$BUILD_DIR"' + assert_include yaml, "BUNDLE_BAR: baz" + + # Round-trip: ensure the quoted value is parsed back correctly + result = Gem::YAMLSerializer.load(yaml) + assert_equal "$BUILD_DIR", result["BUNDLE_FOO"] + assert_equal "baz", result["BUNDLE_BAR"] + end + + def test_dump_quotes_special_characters + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + # Various special characters that should trigger quoting + special_values = { + "dollar" => "$HOME", + "exclamation" => "!important", + "ampersand" => "&anchor", + "asterisk" => "*ref", + "colon_prefix" => ":symbol", + "at_sign" => "@mention", + "percent" => "%encoded", + } + + yaml = Gem::YAMLSerializer.dump(special_values) + special_values.each do |key, value| + assert_include yaml, "#{key}: #{value.inspect}", "Value #{value.inspect} for key #{key} should be quoted" + end + + # Round-trip + result = Gem::YAMLSerializer.load(yaml) + special_values.each do |key, value| + assert_equal value, result[key], "Round-trip failed for key #{key}" + end + end + + def test_load_ambiguous_value_with_colon + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + # "invalid: yaml: hah" is ambiguous YAML - our parser treats it as + # {"invalid" => "yaml: hah"}, but the value looks like a nested mapping. + # config_file.rb's load_file should detect this and reject it. + result = Gem::YAMLSerializer.load("invalid: yaml: hah") + assert_kind_of Hash, result + assert_equal "yaml: hah", result["invalid"] + end + + def test_nested_anchor_in_array_item + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + # Ensure aliases are enabled for this test + aliases_enabled = Gem::SafeYAML.aliases_enabled? + Gem::SafeYAML.aliases_enabled = true + + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: test-gem + version: !ruby/object:Gem::Version + version: 1.0.0 + dependencies: + - !ruby/object:Gem::Dependency + name: foo + requirement: !ruby/object:Gem::Requirement + requirements: + - &id002 + - ">=" + - !ruby/object:Gem::Version + version: "0" + type: :runtime + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Gem::Specification, spec + assert_equal "test-gem", spec.name + + dep = spec.dependencies.first + assert_kind_of Gem::Dependency, dep + + # Requirements should be parsed as nested arrays, not strings + assert_kind_of Array, dep.requirement.requirements + assert_equal 1, dep.requirement.requirements.size + + req_item = dep.requirement.requirements.first + assert_kind_of Array, req_item + assert_equal ">=", req_item[0] + assert_kind_of Gem::Version, req_item[1] + assert_equal "0", req_item[1].version + ensure + Gem::SafeYAML.aliases_enabled = aliases_enabled + end end From 88aeabf8dab66b46da8f82d312ba580b90772678 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 6 Mar 2026 14:54:35 +0900 Subject: [PATCH 19/27] [ruby/rubygems] Add YAMLSerializer round-trip tests https://github.com/ruby/rubygems/commit/89ea9dbb19 --- test/rubygems/test_gem_safe_yaml.rb | 166 ++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/test/rubygems/test_gem_safe_yaml.rb b/test/rubygems/test_gem_safe_yaml.rb index 63cb91d29726f2..f9deda04ae01f5 100644 --- a/test/rubygems/test_gem_safe_yaml.rb +++ b/test/rubygems/test_gem_safe_yaml.rb @@ -571,4 +571,170 @@ def test_nested_anchor_in_array_item ensure Gem::SafeYAML.aliases_enabled = aliases_enabled end + + def test_roundtrip_specification + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + spec = Gem::Specification.new do |s| + s.name = "round-trip-test" + s.version = "2.3.4" + s.platform = "ruby" + s.authors = ["Test Author"] + s.summary = "A test gem for round-trip" + s.description = "Longer description of the test gem" + s.files = ["lib/foo.rb", "README.md"] + s.require_paths = ["lib"] + s.homepage = "https://example.com" + s.licenses = ["MIT"] + s.metadata = { "source_code_uri" => "https://example.com/src" } + s.add_dependency "rake", ">= 1.0" + end + + yaml = Gem::YAMLSerializer.dump(spec) + loaded = Gem::SafeYAML.safe_load(yaml) + + assert_kind_of Gem::Specification, loaded + assert_equal "round-trip-test", loaded.name + assert_equal Gem::Version.new("2.3.4"), loaded.version + assert_equal ["Test Author"], loaded.authors + assert_equal "A test gem for round-trip", loaded.summary + assert_equal ["README.md", "lib/foo.rb"], loaded.files + assert_equal ["lib"], loaded.require_paths + assert_equal "https://example.com", loaded.homepage + assert_equal ["MIT"], loaded.licenses + assert_equal({ "source_code_uri" => "https://example.com/src" }, loaded.metadata) + assert_equal 1, loaded.dependencies.size + + dep = loaded.dependencies.first + assert_equal "rake", dep.name + assert_equal :runtime, dep.type + end + + def test_roundtrip_version + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + ver = Gem::Version.new("1.2.3") + yaml = Gem::YAMLSerializer.dump(ver) + loaded = Gem::YAMLSerializer.load(yaml, permitted_classes: Gem::SafeYAML::PERMITTED_CLASSES) + + assert_kind_of Gem::Version, loaded + assert_equal ver, loaded + end + + def test_roundtrip_platform + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + plat = Gem::Platform.new("x86_64-linux") + yaml = Gem::YAMLSerializer.dump(plat) + loaded = Gem::YAMLSerializer.load(yaml, permitted_classes: Gem::SafeYAML::PERMITTED_CLASSES) + + assert_kind_of Gem::Platform, loaded + assert_equal plat.cpu, loaded.cpu + assert_equal plat.os, loaded.os + assert_equal plat.version, loaded.version + end + + def test_roundtrip_requirement + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + req = Gem::Requirement.new(">= 1.0", "< 2.0") + yaml = Gem::YAMLSerializer.dump(req) + loaded = Gem::YAMLSerializer.load(yaml, permitted_classes: Gem::SafeYAML::PERMITTED_CLASSES) + + assert_kind_of Gem::Requirement, loaded + assert_equal req.requirements.sort_by(&:to_s), loaded.requirements.sort_by(&:to_s) + end + + def test_roundtrip_dependency + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + dep = Gem::Dependency.new("foo", ">= 1.0", :development) + yaml = Gem::YAMLSerializer.dump(dep) + loaded = Gem::YAMLSerializer.load(yaml, permitted_classes: Gem::SafeYAML::PERMITTED_CLASSES) + + assert_kind_of Gem::Dependency, loaded + assert_equal "foo", loaded.name + assert_equal :development, loaded.type + assert_equal dep.requirement.requirements, loaded.requirement.requirements + end + + def test_roundtrip_nested_hash + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + obj = { "a" => { "b" => "c", "d" => [1, 2, 3] } } + yaml = Gem::YAMLSerializer.dump(obj) + loaded = Gem::YAMLSerializer.load(yaml) + + assert_equal obj, loaded + end + + def test_roundtrip_block_scalar + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + obj = { "text" => "line1\nline2\n" } + yaml = Gem::YAMLSerializer.dump(obj) + loaded = Gem::YAMLSerializer.load(yaml) + + assert_equal "line1\nline2\n", loaded["text"] + end + + def test_roundtrip_special_characters + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + obj = { + "dollar" => "$HOME", + "exclamation" => "!important", + "ampersand" => "&anchor", + "asterisk" => "*ref", + "colon_prefix" => ":symbol", + "hash_char" => "value#comment", + "brackets" => "[item]", + "braces" => "{key}", + "comma" => "a,b,c", + } + yaml = Gem::YAMLSerializer.dump(obj) + loaded = Gem::YAMLSerializer.load(yaml) + + obj.each do |key, value| + assert_equal value, loaded[key], "Round-trip failed for key #{key}" + end + end + + def test_roundtrip_boolean_nil_integer + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + obj = { "flag" => true, "count" => 42, "empty" => nil, "off" => false } + yaml = Gem::YAMLSerializer.dump(obj) + loaded = Gem::YAMLSerializer.load(yaml) + + assert_equal true, loaded["flag"] + assert_equal 42, loaded["count"] + assert_nil loaded["empty"] + assert_equal false, loaded["off"] + end + + def test_roundtrip_time + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + time = Time.utc(2024, 6, 15, 12, 30, 45) + obj = { "created" => time } + yaml = Gem::YAMLSerializer.dump(obj) + loaded = Gem::YAMLSerializer.load(yaml) + + assert_kind_of Time, loaded["created"] + assert_equal time.year, loaded["created"].year + assert_equal time.month, loaded["created"].month + assert_equal time.day, loaded["created"].day + end + + def test_roundtrip_empty_collections + pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + + obj = { "arr" => [], "hash" => {} } + yaml = Gem::YAMLSerializer.dump(obj) + loaded = Gem::YAMLSerializer.load(yaml) + + assert_equal [], loaded["arr"] + assert_equal({}, loaded["hash"]) + end end From 4cd372672aa21f65968aa37ce3cc7125edd1c302 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 6 Mar 2026 15:01:27 +0900 Subject: [PATCH 20/27] [ruby/rubygems] Add unit and regression tests for YAML serializer https://github.com/ruby/rubygems/commit/9741fbf151 --- lib/rubygems/safe_yaml.rb | 2 +- test/rubygems/test_gem_safe_yaml.rb | 499 +++++++++++++++++++++++++--- 2 files changed, 463 insertions(+), 38 deletions(-) diff --git a/lib/rubygems/safe_yaml.rb b/lib/rubygems/safe_yaml.rb index 1c15f10eb16661..03db77c6bf3a5d 100644 --- a/lib/rubygems/safe_yaml.rb +++ b/lib/rubygems/safe_yaml.rb @@ -59,7 +59,7 @@ def self.load(input) end end - def self.safe_load(input) + def self.unsafe_load(input) if Gem.use_psych? if ::Psych.respond_to?(:unsafe_load) ::Psych.unsafe_load(input) diff --git a/test/rubygems/test_gem_safe_yaml.rb b/test/rubygems/test_gem_safe_yaml.rb index f9deda04ae01f5..dd1ddf96c2148a 100644 --- a/test/rubygems/test_gem_safe_yaml.rb +++ b/test/rubygems/test_gem_safe_yaml.rb @@ -6,13 +6,11 @@ class TestGemSafeYAML < Gem::TestCase def test_aliases_enabled_by_default - pend "Psych is not loaded" if defined?(Gem::YAMLSerializer) assert_predicate Gem::SafeYAML, :aliases_enabled? assert_equal({ "a" => "a", "b" => "a" }, Gem::SafeYAML.safe_load("a: &a a\nb: *a\n")) end def test_aliases_disabled - pend "Psych is not loaded" if defined?(Gem::YAMLSerializer) aliases_enabled = Gem::SafeYAML.aliases_enabled? Gem::SafeYAML.aliases_enabled = false refute_predicate Gem::SafeYAML, :aliases_enabled? @@ -25,7 +23,7 @@ def test_aliases_disabled end def test_specification_version_is_integer - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? yaml = <<~YAML --- !ruby/object:Gem::Specification @@ -41,21 +39,21 @@ def test_specification_version_is_integer end def test_disallowed_class_rejected - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? yaml = <<~YAML --- !ruby/object:SomeDisallowedClass foo: bar YAML - exception = assert_raise(ArgumentError) do + exception = assert_raise(Psych::DisallowedClass) do Gem::SafeYAML.safe_load(yaml) end assert_match(/Disallowed class/, exception.message) end def test_disallowed_symbol_rejected - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? yaml = <<~YAML --- !ruby/object:Gem::Dependency @@ -74,14 +72,14 @@ def test_disallowed_symbol_rejected version: 0 YAML - exception = assert_raise(ArgumentError) do + exception = assert_raise(Psych::DisallowedClass) do Gem::SafeYAML.safe_load(yaml) end assert_match(/Disallowed symbol/, exception.message) end def test_yaml_serializer_aliases_disabled - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? aliases_enabled = Gem::SafeYAML.aliases_enabled? Gem::SafeYAML.aliases_enabled = false @@ -89,7 +87,7 @@ def test_yaml_serializer_aliases_disabled yaml = "a: &anchor value\nb: *anchor\n" - exception = assert_raise(ArgumentError) do + exception = assert_raise(Psych::AliasesNotEnabled) do Gem::SafeYAML.safe_load(yaml) end assert_match(/YAML aliases are not allowed/, exception.message) @@ -98,7 +96,7 @@ def test_yaml_serializer_aliases_disabled end def test_real_gemspec_fileutils - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? yaml = <<~YAML --- !ruby/object:Gem::Specification @@ -160,7 +158,7 @@ def test_real_gemspec_fileutils end def test_yaml_anchor_and_alias_enabled - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? aliases_enabled = Gem::SafeYAML.aliases_enabled? Gem::SafeYAML.aliases_enabled = true @@ -187,7 +185,7 @@ def test_yaml_anchor_and_alias_enabled end def test_real_gemspec_rubygems_bundler - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? yaml = <<~YAML --- !ruby/object:Gem::Specification @@ -272,7 +270,7 @@ def test_real_gemspec_rubygems_bundler end def test_empty_requirements_array - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? yaml = <<~YAML --- !ruby/object:Gem::Specification @@ -303,7 +301,7 @@ def test_empty_requirements_array end def test_requirements_hash_converted_to_array - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? # Malformed YAML where requirements is a Hash instead of Array yaml = <<~YAML @@ -325,7 +323,7 @@ def test_requirements_hash_converted_to_array end def test_rdoc_options_hash_converted_to_array - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? # Some gemspecs incorrectly have rdoc_options: {} instead of rdoc_options: [] yaml = <<~YAML @@ -346,7 +344,7 @@ def test_rdoc_options_hash_converted_to_array end def test_load_returns_hash_for_comment_only_yaml - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? # Bundler config files may contain only comments after deleting all keys result = Gem::YAMLSerializer.load("---\n# BUNDLE_FOO: \"bar\"\n") @@ -355,7 +353,7 @@ def test_load_returns_hash_for_comment_only_yaml end def test_load_returns_hash_for_empty_document - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? assert_equal({}, Gem::YAMLSerializer.load("---\n")) assert_equal({}, Gem::YAMLSerializer.load("")) @@ -363,7 +361,7 @@ def test_load_returns_hash_for_empty_document end def test_load_returns_hash_for_flow_empty_hash - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? # Gem::YAMLSerializer.dump({}) produces "--- {}\n" result = Gem::YAMLSerializer.load("--- {}\n") @@ -372,7 +370,7 @@ def test_load_returns_hash_for_flow_empty_hash end def test_load_parses_flow_empty_hash_as_value - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? result = Gem::YAMLSerializer.load("metadata: {}\n") assert_kind_of Hash, result @@ -381,7 +379,7 @@ def test_load_parses_flow_empty_hash_as_value end def test_yaml_non_specific_tag_stripped - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? # Legacy RubyGems (1.x) generated YAML with ! non-specific tags like: # - ! '>=' @@ -413,7 +411,7 @@ def test_yaml_non_specific_tag_stripped end def test_legacy_gemspec_with_anchors_and_non_specific_tags - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? aliases_enabled = Gem::SafeYAML.aliases_enabled? Gem::SafeYAML.aliases_enabled = true @@ -470,7 +468,7 @@ def test_legacy_gemspec_with_anchors_and_non_specific_tags end def test_non_specific_tag_on_plain_value - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? # ! tag on a bracketed value like rubyforge_project: ! '[none]' result = Gem::YAMLSerializer.load("key: ! '[none]'\n") @@ -478,7 +476,7 @@ def test_non_specific_tag_on_plain_value end def test_dump_quotes_dollar_sign_values - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? # Values starting with $ should be quoted to preserve them as strings yaml = Gem::YAMLSerializer.dump({ "BUNDLE_FOO" => "$BUILD_DIR", "BUNDLE_BAR" => "baz" }) @@ -492,7 +490,7 @@ def test_dump_quotes_dollar_sign_values end def test_dump_quotes_special_characters - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? # Various special characters that should trigger quoting special_values = { @@ -518,7 +516,7 @@ def test_dump_quotes_special_characters end def test_load_ambiguous_value_with_colon - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? # "invalid: yaml: hah" is ambiguous YAML - our parser treats it as # {"invalid" => "yaml: hah"}, but the value looks like a nested mapping. @@ -529,7 +527,7 @@ def test_load_ambiguous_value_with_colon end def test_nested_anchor_in_array_item - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? # Ensure aliases are enabled for this test aliases_enabled = Gem::SafeYAML.aliases_enabled? @@ -573,7 +571,7 @@ def test_nested_anchor_in_array_item end def test_roundtrip_specification - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? spec = Gem::Specification.new do |s| s.name = "round-trip-test" @@ -611,7 +609,7 @@ def test_roundtrip_specification end def test_roundtrip_version - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? ver = Gem::Version.new("1.2.3") yaml = Gem::YAMLSerializer.dump(ver) @@ -622,7 +620,7 @@ def test_roundtrip_version end def test_roundtrip_platform - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? plat = Gem::Platform.new("x86_64-linux") yaml = Gem::YAMLSerializer.dump(plat) @@ -635,7 +633,7 @@ def test_roundtrip_platform end def test_roundtrip_requirement - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? req = Gem::Requirement.new(">= 1.0", "< 2.0") yaml = Gem::YAMLSerializer.dump(req) @@ -646,7 +644,7 @@ def test_roundtrip_requirement end def test_roundtrip_dependency - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? dep = Gem::Dependency.new("foo", ">= 1.0", :development) yaml = Gem::YAMLSerializer.dump(dep) @@ -659,7 +657,7 @@ def test_roundtrip_dependency end def test_roundtrip_nested_hash - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? obj = { "a" => { "b" => "c", "d" => [1, 2, 3] } } yaml = Gem::YAMLSerializer.dump(obj) @@ -669,7 +667,7 @@ def test_roundtrip_nested_hash end def test_roundtrip_block_scalar - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? obj = { "text" => "line1\nline2\n" } yaml = Gem::YAMLSerializer.dump(obj) @@ -679,7 +677,7 @@ def test_roundtrip_block_scalar end def test_roundtrip_special_characters - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? obj = { "dollar" => "$HOME", @@ -701,7 +699,7 @@ def test_roundtrip_special_characters end def test_roundtrip_boolean_nil_integer - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? obj = { "flag" => true, "count" => 42, "empty" => nil, "off" => false } yaml = Gem::YAMLSerializer.dump(obj) @@ -714,7 +712,7 @@ def test_roundtrip_boolean_nil_integer end def test_roundtrip_time - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? time = Time.utc(2024, 6, 15, 12, 30, 45) obj = { "created" => time } @@ -728,7 +726,7 @@ def test_roundtrip_time end def test_roundtrip_empty_collections - pend "YAMLSerializer is not loaded" unless defined?(Gem::YAMLSerializer) + pend "Psych mode" if Gem.use_psych? obj = { "arr" => [], "hash" => {} } yaml = Gem::YAMLSerializer.dump(obj) @@ -737,4 +735,431 @@ def test_roundtrip_empty_collections assert_equal [], loaded["arr"] assert_equal({}, loaded["hash"]) end + + def test_load_double_quoted_escape_sequences + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("newline: \"hello\\nworld\"") + assert_equal "hello\nworld", result["newline"] + + result = Gem::YAMLSerializer.load("tab: \"col1\\tcol2\"") + assert_equal "col1\tcol2", result["tab"] + + result = Gem::YAMLSerializer.load("cr: \"line\\rend\"") + assert_equal "line\rend", result["cr"] + + result = Gem::YAMLSerializer.load("quote: \"say\\\"hi\\\"\"") + assert_equal "say\"hi\"", result["quote"] + end + + def test_load_single_quoted_escape + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("key: 'it''s'") + assert_equal "it's", result["key"] + + result = Gem::YAMLSerializer.load("key: 'no escape \\n here'") + assert_equal "no escape \\n here", result["key"] + end + + def test_load_quoted_numeric_stays_string + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("key: \"42\"") + assert_equal "42", result["key"] + assert_kind_of String, result["key"] + + result = Gem::YAMLSerializer.load("key: '99'") + assert_equal "99", result["key"] + assert_kind_of String, result["key"] + end + + def test_load_empty_string_value + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("key: \"\"") + assert_equal "", result["key"] + end + + def test_load_unquoted_integer + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("key: 42") + assert_equal 42, result["key"] + assert_kind_of Integer, result["key"] + + result = Gem::YAMLSerializer.load("key: -7") + assert_equal(-7, result["key"]) + end + + def test_load_boolean_values + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("a: true\nb: false") + assert_equal true, result["a"] + assert_equal false, result["b"] + end + + def test_load_nil_value + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("key: nil") + assert_nil result["key"] + end + + def test_load_time_value + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("date: 2024-06-15 12:30:45.000000000 Z") + assert_kind_of Time, result["date"] + assert_equal 2024, result["date"].year + assert_equal 6, result["date"].month + assert_equal 15, result["date"].day + end + + def test_load_block_scalar_keep_trailing_newline + pend "Psych mode" if Gem.use_psych? + + yaml = "text: |\n line1\n line2\n" + result = Gem::YAMLSerializer.load(yaml) + assert_equal "line1\nline2\n", result["text"] + end + + def test_load_block_scalar_strip_trailing_newline + pend "Psych mode" if Gem.use_psych? + + yaml = "text: |-\n no trailing newline\n" + result = Gem::YAMLSerializer.load(yaml) + assert_equal "no trailing newline", result["text"] + refute result["text"].end_with?("\n") + end + + def test_load_flow_array + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("items: [a, b, c]") + assert_equal ["a", "b", "c"], result["items"] + end + + def test_load_flow_empty_array + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("items: []") + assert_equal [], result["items"] + end + + def test_load_mapping_key_with_no_value + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("key:") + assert_kind_of Hash, result + assert_nil result["key"] + end + + def test_load_sequence_item_as_mapping + pend "Psych mode" if Gem.use_psych? + + yaml = "items:\n- name: foo\n ver: 1\n- name: bar\n ver: 2" + result = Gem::YAMLSerializer.load(yaml) + assert_equal [{ "name" => "foo", "ver" => 1 }, { "name" => "bar", "ver" => 2 }], result["items"] + end + + def test_load_nested_sequence + pend "Psych mode" if Gem.use_psych? + + yaml = "matrix:\n- - a\n - b\n- - c\n - d" + result = Gem::YAMLSerializer.load(yaml) + assert_equal [["a", "b"], ["c", "d"]], result["matrix"] + end + + def test_load_comment_stripped_from_value + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("key: value # this is a comment") + assert_equal "value", result["key"] + end + + def test_load_comment_in_quoted_string_preserved + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("key: \"value # not a comment\"") + assert_equal "value # not a comment", result["key"] + + result = Gem::YAMLSerializer.load("key: 'value # not a comment'") + assert_equal "value # not a comment", result["key"] + end + + def test_load_crlf_line_endings + pend "Psych mode" if Gem.use_psych? + + result = Gem::YAMLSerializer.load("key: value\r\nother: data\r\n") + assert_equal "value", result["key"] + assert_equal "data", result["other"] + end + + def test_load_version_requirement_old_tag + pend "Psych mode" if Gem.use_psych? + + yaml = <<~YAML + !ruby/object:Gem::Version::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: "1.0" + YAML + + req = Gem::YAMLSerializer.load(yaml, permitted_classes: Gem::SafeYAML::PERMITTED_CLASSES) + assert_kind_of Gem::Requirement, req + assert_equal [[">=", Gem::Version.new("1.0")]], req.requirements + end + + def test_load_platform_from_value_field + pend "Psych mode" if Gem.use_psych? + + yaml = "!ruby/object:Gem::Platform\nvalue: x86-linux\n" + plat = Gem::YAMLSerializer.load(yaml, permitted_classes: Gem::SafeYAML::PERMITTED_CLASSES) + assert_kind_of Gem::Platform, plat + assert_equal "x86", plat.cpu + assert_equal "linux", plat.os + end + + def test_load_platform_from_cpu_os_version_fields + pend "Psych mode" if Gem.use_psych? + + yaml = "!ruby/object:Gem::Platform\ncpu: x86_64\nos: darwin\nversion: nil\n" + plat = Gem::YAMLSerializer.load(yaml, permitted_classes: Gem::SafeYAML::PERMITTED_CLASSES) + assert_kind_of Gem::Platform, plat + assert_equal "x86_64", plat.cpu + assert_equal "darwin", plat.os + end + + def test_load_dependency_missing_requirement_uses_default + pend "Psych mode" if Gem.use_psych? + + yaml = <<~YAML + !ruby/object:Gem::Dependency + name: foo + type: :runtime + YAML + + dep = Gem::YAMLSerializer.load(yaml, permitted_classes: Gem::SafeYAML::PERMITTED_CLASSES) + assert_kind_of Gem::Dependency, dep + assert_equal "foo", dep.name + assert_equal :runtime, dep.type + assert_kind_of Gem::Requirement, dep.requirement + end + + def test_load_dependency_missing_type_defaults_to_runtime + pend "Psych mode" if Gem.use_psych? + + yaml = <<~YAML + !ruby/object:Gem::Dependency + name: bar + requirement: !ruby/object:Gem::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: '0' + YAML + + dep = Gem::YAMLSerializer.load(yaml, permitted_classes: Gem::SafeYAML::PERMITTED_CLASSES) + assert_equal :runtime, dep.type + end + + def test_specification_version_non_numeric_string_not_converted + pend "Psych mode" if Gem.use_psych? + + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: test + version: !ruby/object:Gem::Version + version: 1.0.0 + specification_version: abc + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Gem::Specification, spec + # Non-numeric string should not be converted to Integer + assert_equal "abc", spec.specification_version + end + + def test_unknown_permitted_tag_returns_hash_with_tag + pend "Psych mode" if Gem.use_psych? + + yaml = "!ruby/object:MyCustomClass\nfoo: bar\n" + result = Gem::YAMLSerializer.load(yaml, permitted_classes: ["MyCustomClass"]) + assert_kind_of Hash, result + assert_equal "bar", result["foo"] + assert_equal "!ruby/object:MyCustomClass", result[:tag] + end + + def test_dump_block_scalar_with_trailing_newline + pend "Psych mode" if Gem.use_psych? + + yaml = Gem::YAMLSerializer.dump({ "text" => "line1\nline2\n" }) + assert_include yaml, " |\n" + refute_includes yaml, " |-\n" + end + + def test_dump_block_scalar_without_trailing_newline + pend "Psych mode" if Gem.use_psych? + + yaml = Gem::YAMLSerializer.dump({ "text" => "line1\nline2" }) + assert_include yaml, " |-\n" + end + + def test_dump_nil_value + pend "Psych mode" if Gem.use_psych? + + yaml = Gem::YAMLSerializer.dump({ "key" => nil }) + assert_include yaml, "key: nil\n" + + loaded = Gem::YAMLSerializer.load(yaml) + assert_nil loaded["key"] + end + + def test_dump_symbol_keys_quoted + pend "Psych mode" if Gem.use_psych? + + yaml = Gem::YAMLSerializer.dump({ foo: "bar" }) + # Symbol keys should use inspect format + assert_include yaml, ":foo:" + + # Symbol values in hash with symbol keys should be quoted + yaml = Gem::YAMLSerializer.dump({ type: ":runtime" }) + assert_include yaml, "\":runtime\"" + end + + def test_regression_flow_empty_hash_as_root + pend "Psych mode" if Gem.use_psych? + + # Previously returned Mapping struct instead of Hash + result = Gem::YAMLSerializer.load("--- {}") + assert_kind_of Hash, result + assert_empty result + end + + def test_regression_alias_check_in_builder_not_parser + pend "Psych mode" if Gem.use_psych? + + # Previously aliases were resolved in Parser, bypassing Builder's policy check. + # The Builder must enforce aliases: false. + aliases_enabled = Gem::SafeYAML.aliases_enabled? + Gem::SafeYAML.aliases_enabled = false + + # Alias in mapping value + exception = assert_raise(Psych::AliasesNotEnabled) do + Gem::YAMLSerializer.load("a: &x val\nb: *x", aliases: false) + end + assert_match(/YAML aliases are not allowed/, exception.message) + + # Alias in sequence item + exception = assert_raise(Psych::AliasesNotEnabled) do + Gem::YAMLSerializer.load("items:\n- &x val\n- *x", aliases: false) + end + assert_match(/YAML aliases are not allowed/, exception.message) + ensure + Gem::SafeYAML.aliases_enabled = aliases_enabled + end + + def test_regression_anchored_mapping_stored_for_alias_resolution + pend "Psych mode" if Gem.use_psych? + + # Previously build_mapping didn't call store_anchor, so anchored + # Gem types (Requirement, etc.) couldn't be resolved via aliases. + aliases_enabled = Gem::SafeYAML.aliases_enabled? + Gem::SafeYAML.aliases_enabled = true + + yaml = <<~YAML + a: &req !ruby/object:Gem::Requirement + requirements: + - - ">=" + - !ruby/object:Gem::Version + version: '0' + b: *req + YAML + + result = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Gem::Requirement, result["a"] + assert_kind_of Gem::Requirement, result["b"] + assert_equal result["a"].requirements, result["b"].requirements + ensure + Gem::SafeYAML.aliases_enabled = aliases_enabled + end + + def test_regression_register_anchor_sets_node_anchor + pend "Psych mode" if Gem.use_psych? + + # Previously register_anchor only stored node in @anchors hash but + # didn't set node.anchor, so Builder couldn't track anchored values. + aliases_enabled = Gem::SafeYAML.aliases_enabled? + Gem::SafeYAML.aliases_enabled = true + + yaml = <<~YAML + items: + - &item !ruby/object:Gem::Version + version: '1.0' + - *item + YAML + + result = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Array, result["items"] + assert_equal 2, result["items"].size + assert_kind_of Gem::Version, result["items"][0] + assert_kind_of Gem::Version, result["items"][1] + assert_equal result["items"][0], result["items"][1] + ensure + Gem::SafeYAML.aliases_enabled = aliases_enabled + end + + def test_regression_coerce_empty_hash_not_wrapped_in_scalar + pend "Psych mode" if Gem.use_psych? + + # Previously coerce("{}") returned Mapping but parse_plain_scalar + # wrapped it in Scalar.new(value: Mapping), causing type mismatch. + result = Gem::YAMLSerializer.load("--- {}") + assert_kind_of Hash, result + + result = Gem::YAMLSerializer.load("key: {}") + assert_kind_of Hash, result["key"] + end + + def test_regression_rdoc_options_normalized_to_array + pend "Psych mode" if Gem.use_psych? + + # rdoc_options as Hash (malformed gemspec) + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: test + version: !ruby/object:Gem::Version + version: 1.0.0 + rdoc_options: + --title: MyGem + --main: README + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Array, spec.rdoc_options + # Hash rdoc_options: normalize_rdoc_options! extracts values + assert_include spec.rdoc_options, "MyGem" + assert_include spec.rdoc_options, "README" + end + + def test_regression_requirements_field_normalized_to_array + pend "Psych mode" if Gem.use_psych? + + # The "requirements" field in a Specification (not Requirement) + # should be normalized from Hash to Array if malformed + yaml = <<~YAML + --- !ruby/object:Gem::Specification + name: test + version: !ruby/object:Gem::Version + version: 1.0.0 + requirements: + foo: bar + YAML + + spec = Gem::SafeYAML.safe_load(yaml) + assert_kind_of Array, spec.requirements + end end From 6a92781ff6b4a4338aaf26cf79fbf9f0f20dc382 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 12:35:06 +0900 Subject: [PATCH 21/27] [ruby/rubygems] Add Psych stub classes to yaml serializer https://github.com/ruby/rubygems/commit/f3a1b17fce --- lib/rubygems.rb | 7 +----- lib/rubygems/safe_yaml.rb | 2 +- lib/rubygems/yaml_serializer.rb | 36 +++++++++++++++++++++-------- test/rubygems/test_gem_safe_yaml.rb | 13 ++++------- 4 files changed, 33 insertions(+), 25 deletions(-) diff --git a/lib/rubygems.rb b/lib/rubygems.rb index baf0599ee6bbd7..55e214e8631f58 100644 --- a/lib/rubygems.rb +++ b/lib/rubygems.rb @@ -656,14 +656,9 @@ def self.load_yaml return if @yaml_loaded @use_psych = ENV["RUBYGEMS_USE_PSYCH"] == "true" || - (defined?(@configuration) && @configuration && !!@configuration[:use_psych]) + (defined?(@configuration) && @configuration && !@configuration[:use_psych].nil?) if @use_psych - # Remove Psych stubs (defined by yaml_serializer.rb) before loading - # real Psych to avoid superclass mismatch errors - if defined?(Psych) && !defined?(Psych::VERSION) - Object.send(:remove_const, :Psych) - end require "psych" require_relative "rubygems/psych_tree" end diff --git a/lib/rubygems/safe_yaml.rb b/lib/rubygems/safe_yaml.rb index 03db77c6bf3a5d..223c50ce335012 100644 --- a/lib/rubygems/safe_yaml.rb +++ b/lib/rubygems/safe_yaml.rb @@ -37,7 +37,7 @@ def self.aliases_enabled? # :nodoc: def self.safe_load(input) if Gem.use_psych? ::Psych.safe_load(input, permitted_classes: PERMITTED_CLASSES, - permitted_symbols: PERMITTED_SYMBOLS, aliases: @aliases_enabled) + permitted_symbols: PERMITTED_SYMBOLS, aliases: @aliases_enabled) else Gem::YAMLSerializer.load( input, diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index 88b0100f2183ad..72207c7c5339d6 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -1,11 +1,27 @@ # frozen_string_literal: true -unless defined?(Psych) - module Psych - class SyntaxError < ::StandardError; end - class DisallowedClass < ::ArgumentError; end - class BadAlias < ::ArgumentError; end - class AliasesNotEnabled < BadAlias; end +unless defined?(Psych::Exception) + begin + require "psych/exception" + rescue LoadError + module Psych + class Exception < ::RuntimeError; end + class SyntaxError < Exception; end + + class DisallowedClass < Exception + def initialize(action, klass_name) + super("Tried to #{action} unspecified class: #{klass_name}") + end + end + + class BadAlias < Exception; end + + class AliasesNotEnabled < BadAlias + def initialize + super "Alias parsing was not enabled. To enable it, pass `aliases: true` to `Psych::load` or `Psych::safe_load`." + end + end + end end end @@ -414,7 +430,7 @@ def build_node(node) end def resolve_alias(node) - raise Psych::AliasesNotEnabled, "YAML aliases are not allowed" unless @aliases + raise Psych::AliasesNotEnabled unless @aliases @anchor_values.fetch(node.name, nil) end @@ -566,19 +582,19 @@ def build_safe_requirement(req_value) def validate_tag!(tag) unless @permitted_tags.include?(tag) - raise Psych::DisallowedClass, "Disallowed class: #{tag}" + raise Psych::DisallowedClass.new("load", tag) end end def validate_symbol!(sym) if @permitted_symbols.any? && !@permitted_symbols.include?(sym.to_s) - raise Psych::DisallowedClass, "Disallowed symbol: #{sym.inspect}" + raise Psych::DisallowedClass.new("load", sym.inspect) end end def check_anchor!(node) if node.anchor - raise Psych::AliasesNotEnabled, "YAML aliases are not allowed" unless @aliases + raise Psych::AliasesNotEnabled unless @aliases end end diff --git a/test/rubygems/test_gem_safe_yaml.rb b/test/rubygems/test_gem_safe_yaml.rb index dd1ddf96c2148a..72b913e0c1ed33 100644 --- a/test/rubygems/test_gem_safe_yaml.rb +++ b/test/rubygems/test_gem_safe_yaml.rb @@ -49,7 +49,7 @@ def test_disallowed_class_rejected exception = assert_raise(Psych::DisallowedClass) do Gem::SafeYAML.safe_load(yaml) end - assert_match(/Disallowed class/, exception.message) + assert_match(/unspecified class/, exception.message) end def test_disallowed_symbol_rejected @@ -75,7 +75,7 @@ def test_disallowed_symbol_rejected exception = assert_raise(Psych::DisallowedClass) do Gem::SafeYAML.safe_load(yaml) end - assert_match(/Disallowed symbol/, exception.message) + assert_match(/unspecified class/, exception.message) end def test_yaml_serializer_aliases_disabled @@ -87,10 +87,9 @@ def test_yaml_serializer_aliases_disabled yaml = "a: &anchor value\nb: *anchor\n" - exception = assert_raise(Psych::AliasesNotEnabled) do + assert_raise(Psych::AliasesNotEnabled) do Gem::SafeYAML.safe_load(yaml) end - assert_match(/YAML aliases are not allowed/, exception.message) ensure Gem::SafeYAML.aliases_enabled = aliases_enabled end @@ -1048,16 +1047,14 @@ def test_regression_alias_check_in_builder_not_parser Gem::SafeYAML.aliases_enabled = false # Alias in mapping value - exception = assert_raise(Psych::AliasesNotEnabled) do + assert_raise(Psych::AliasesNotEnabled) do Gem::YAMLSerializer.load("a: &x val\nb: *x", aliases: false) end - assert_match(/YAML aliases are not allowed/, exception.message) # Alias in sequence item - exception = assert_raise(Psych::AliasesNotEnabled) do + assert_raise(Psych::AliasesNotEnabled) do Gem::YAMLSerializer.load("items:\n- &x val\n- *x", aliases: false) end - assert_match(/YAML aliases are not allowed/, exception.message) ensure Gem::SafeYAML.aliases_enabled = aliases_enabled end From 6425157eae086f3c50c7055a936daab4647d6090 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 15:31:07 +0900 Subject: [PATCH 22/27] [ruby/rubygems] Simplify Psych exception stubs and fallback raises https://github.com/ruby/rubygems/commit/61bfb3fff8 --- lib/rubygems/yaml_serializer.rb | 41 ++++++++++++++------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index 72207c7c5339d6..28616cdfdea819 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -1,27 +1,12 @@ # frozen_string_literal: true -unless defined?(Psych::Exception) - begin - require "psych/exception" - rescue LoadError - module Psych - class Exception < ::RuntimeError; end - class SyntaxError < Exception; end - - class DisallowedClass < Exception - def initialize(action, klass_name) - super("Tried to #{action} unspecified class: #{klass_name}") - end - end - - class BadAlias < Exception; end - - class AliasesNotEnabled < BadAlias - def initialize - super "Alias parsing was not enabled. To enable it, pass `aliases: true` to `Psych::load` or `Psych::safe_load`." - end - end - end +unless defined?(Psych::VERSION) + module Psych + class Exception < ::RuntimeError; end + class SyntaxError < Exception; end + class DisallowedClass < Exception; end + class BadAlias < Exception; end + class AliasesNotEnabled < BadAlias; end end end @@ -582,13 +567,21 @@ def build_safe_requirement(req_value) def validate_tag!(tag) unless @permitted_tags.include?(tag) - raise Psych::DisallowedClass.new("load", tag) + if defined?(Psych::VERSION) + raise Psych::DisallowedClass.new("load", tag) + else + raise Psych::DisallowedClass, "Tried to load unspecified class: #{tag}" + end end end def validate_symbol!(sym) if @permitted_symbols.any? && !@permitted_symbols.include?(sym.to_s) - raise Psych::DisallowedClass.new("load", sym.inspect) + if defined?(Psych::VERSION) + raise Psych::DisallowedClass.new("load", sym.inspect) + else + raise Psych::DisallowedClass, "Tried to load unspecified class: #{sym.inspect}" + end end end From 4da2b2d912b750d681b5a78ee8d4bba7a9015040 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 16:05:05 +0900 Subject: [PATCH 23/27] [ruby/rubygems] Remove redundant SafeYAML.load and update tests https://github.com/ruby/rubygems/commit/fa4771bcf5 --- lib/rubygems/safe_yaml.rb | 11 ----------- test/rubygems/helper.rb | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/lib/rubygems/safe_yaml.rb b/lib/rubygems/safe_yaml.rb index 223c50ce335012..c59b4653586282 100644 --- a/lib/rubygems/safe_yaml.rb +++ b/lib/rubygems/safe_yaml.rb @@ -49,17 +49,6 @@ def self.safe_load(input) end def self.load(input) - if Gem.use_psych? - ::Psych.safe_load(input, permitted_classes: [::Symbol]) - else - Gem::YAMLSerializer.load( - input, - permitted_classes: [::Symbol] - ) - end - end - - def self.unsafe_load(input) if Gem.use_psych? if ::Psych.respond_to?(:unsafe_load) ::Psych.unsafe_load(input) diff --git a/test/rubygems/helper.rb b/test/rubygems/helper.rb index ec373d41e0202a..783818b6eb6f52 100644 --- a/test/rubygems/helper.rb +++ b/test/rubygems/helper.rb @@ -738,7 +738,7 @@ def write_dummy_extconf(gem_name) # Load a YAML string, the psych 3 way def load_yaml(yaml) - Gem::SafeYAML.safe_load(yaml) + Gem::SafeYAML.load(yaml) end ## From 1425c52227045fac17764b426a90d1b4ac369ba0 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Mon, 9 Mar 2026 17:26:02 +0900 Subject: [PATCH 24/27] Parse ISO8601 datetimes without Time.parse --- lib/rubygems/yaml_serializer.rb | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/rubygems/yaml_serializer.rb b/lib/rubygems/yaml_serializer.rb index 28616cdfdea819..edc0133ce272cc 100644 --- a/lib/rubygems/yaml_serializer.rb +++ b/lib/rubygems/yaml_serializer.rb @@ -271,12 +271,16 @@ def coerce(val) return Sequence.new if inner.empty? items = inner.split(/\s*,\s*/).reject(&:empty?).map {|e| Scalar.new(value: coerce(e)) } Sequence.new(items: items) - elsif /^\d{4}-\d{2}-\d{2}/.match?(val) - require "time" + elsif /\A\d{4}-\d{2}-\d{2}([ T]\d{2}:\d{2}:\d{2})?/.match?(val) begin - Time.parse(val) + Time.new(val) rescue ArgumentError - val + # date-only format like "2024-06-15" is not supported by Time.new + if /\A(\d{4})-(\d{2})-(\d{2})\z/.match(val) + Time.utc($1.to_i, $2.to_i, $3.to_i) + else + val + end end elsif /^-?\d+$/.match?(val) val.to_i From 364f2fc1471620371439c6add5f00ac6d18b0c4c Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Fri, 5 Dec 2025 20:26:04 +0100 Subject: [PATCH 25/27] Propose myself as maintainer of benchmark * I always had an interest about the benchmark stdlib and did significant contributions to it, notably 979ec8df5daf6db314b2f17e53b53d269881d6ca. * Ref: https://bugs.ruby-lang.org/issues/21948 --- doc/maintainers.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/maintainers.md b/doc/maintainers.md index 6b820a516a32d4..04f4d21683d9d6 100644 --- a/doc/maintainers.md +++ b/doc/maintainers.md @@ -553,6 +553,7 @@ It may needs to make consensus on ruby-core/ruby-dev before making major changes #### benchmark +* Benoit Daloze ([eregon]) * https://github.com/ruby/benchmark * https://rubygems.org/gems/benchmark @@ -672,6 +673,7 @@ It may needs to make consensus on ruby-core/ruby-dev before making major changes [earlopain]: https://github.com/earlopain [eban]: https://github.com/eban [eileencodes]: https://github.com/eileencodes +[eregon]: https://github.com/eregon [hasumikin]: https://github.com/hasumikin [hsbt]: https://github.com/hsbt [ima1zumi]: https://github.com/ima1zumi From b5ffaa3a01e53b838bc531b5ca45b967e211a8f6 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Mon, 9 Mar 2026 13:04:34 +0100 Subject: [PATCH 26/27] [ruby/timeout] Fix timing-dependent test * The timeout could trigger before the `raise`. https://github.com/ruby/timeout/commit/e4aa36096f --- test/test_timeout.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_timeout.rb b/test/test_timeout.rb index 5db355a7da162d..be752d9b4ac773 100644 --- a/test/test_timeout.rb +++ b/test/test_timeout.rb @@ -128,8 +128,8 @@ def test_nested_timeout_error_identity def test_nested_timeout_which_error_bubbles_up raised_exception = nil begin - Timeout.timeout(0.1) { - Timeout.timeout(1) { + Timeout.timeout(1) { + Timeout.timeout(10) { raise Timeout::ExitException.new("inner message") } } From 4ce8515c699951e24d812511dee3a06818c35f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Barri=C3=A9?= Date: Mon, 9 Mar 2026 11:50:03 +0100 Subject: [PATCH 27/27] [ruby/timeout] Remove warnings https://github.com/ruby/timeout/commit/9b935535ff --- test/test_timeout.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_timeout.rb b/test/test_timeout.rb index be752d9b4ac773..2703a0314df99e 100644 --- a/test/test_timeout.rb +++ b/test/test_timeout.rb @@ -464,10 +464,12 @@ def test_timeout_in_trap_handler # Stubs Fiber.current_scheduler for the duration of the block, then restores it. def with_mock_scheduler(mock) original = Fiber.method(:current_scheduler) + Fiber.singleton_class.remove_method(:current_scheduler) Fiber.define_singleton_method(:current_scheduler) { mock } begin yield ensure + Fiber.singleton_class.remove_method(:current_scheduler) Fiber.define_singleton_method(:current_scheduler, original) end end