From 131a17025bbcd8e440dd48f521f14f64ec77cf15 Mon Sep 17 00:00:00 2001 From: shixuantong Date: Sat, 8 Nov 2025 09:20:43 +0800 Subject: [PATCH] fix CVE-2025-58767 CVE-2025-61594 (cherry picked from commit fdd3106f702700ca20b466459bfc27b9d1716abc) --- backport-Add-IOSource-match-method-216.patch | 387 ++++++++++++++++++ backport-CVE-2025-58767.patch | 370 +++++++++++++++++ backport-CVE-2025-61594.patch | 121 ++++++ ...expression-processing-in-the-form-of.patch | 92 +++++ ...r-peek_byte-to-get-double-or-single-.patch | 121 ++++++ ruby.spec | 10 +- 6 files changed, 1100 insertions(+), 1 deletion(-) create mode 100644 backport-Add-IOSource-match-method-216.patch create mode 100644 backport-CVE-2025-58767.patch create mode 100644 backport-CVE-2025-61594.patch create mode 100644 backport-Reduced-regular-expression-processing-in-the-form-of.patch create mode 100644 backport-Use-StringScanner-peek_byte-to-get-double-or-single-.patch diff --git a/backport-Add-IOSource-match-method-216.patch b/backport-Add-IOSource-match-method-216.patch new file mode 100644 index 0000000..296165c --- /dev/null +++ b/backport-Add-IOSource-match-method-216.patch @@ -0,0 +1,387 @@ +From 8ef75024b96d3e5279b39fdd1692821cdbcd84b5 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Sun, 27 Oct 2024 15:02:18 +0900 +Subject: [PATCH] Add `IOSource#match?` method (#216) + +## Why? +`StringScanner#match?` is faster than `StringScanner#check`. + +See: https://github.com/ruby/strscan/pull/111 + +## Benchmark +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.4/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.4 (2024-07-09 revision be1089c8ec) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 18.819 19.362 32.846 34.708 i/s - 100.000 times in 5.313905s 5.164791s 3.044500s 2.881200s + sax 28.188 29.982 48.386 52.554 i/s - 100.000 times in 3.547597s 3.335304s 2.066732s 1.902809s + pull 31.962 33.902 57.868 60.662 i/s - 100.000 times in 3.128689s 2.949690s 1.728071s 1.648467s + stream 31.436 33.030 52.808 56.647 i/s - 100.000 times in 3.181095s 3.027574s 1.893635s 1.765304s + +Comparison: + dom + after(YJIT): 34.7 i/s + before(YJIT): 32.8 i/s - 1.06x slower + after: 19.4 i/s - 1.79x slower + before: 18.8 i/s - 1.84x slower + + sax + after(YJIT): 52.6 i/s + before(YJIT): 48.4 i/s - 1.09x slower + after: 30.0 i/s - 1.75x slower + before: 28.2 i/s - 1.86x slower + + pull + after(YJIT): 60.7 i/s + before(YJIT): 57.9 i/s - 1.05x slower + after: 33.9 i/s - 1.79x slower + before: 32.0 i/s - 1.90x slower + + stream + after(YJIT): 56.6 i/s + before(YJIT): 52.8 i/s - 1.07x slower + after: 33.0 i/s - 1.72x slower + before: 31.4 i/s - 1.80x slower + +``` + +- YJIT=ON : 1.05x - 1.09x faster +- YJIT=OFF : 1.02x - 1.06x faster + +--------- + +Co-authored-by: Sutou Kouhei +--- + .../lib/rexml/parsers/baseparser.rb | 84 +++++++++---------- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 35 ++++++++ + 2 files changed, 77 insertions(+), 42 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index e9ab0ee..b0e5988 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -263,10 +263,10 @@ module REXML + @source.ensure_buffer + if @document_status == nil + start_position = @source.position +- if @source.match("/um, true) + if md.nil? + raise REXML::ParseException.new("Unclosed comment", @source) +@@ -275,10 +275,10 @@ module REXML + raise REXML::ParseException.new("Malformed comment", @source) + end + return [ :comment, md[1] ] +- elsif @source.match("DOCTYPE", true) ++ elsif @source.match?("DOCTYPE", true) + base_error_message = "Malformed DOCTYPE" +- unless @source.match(/\s+/um, true) +- if @source.match(">") ++ unless @source.match?(/\s+/um, true) ++ if @source.match?(">") + message = "#{base_error_message}: name is missing" + else + message = "#{base_error_message}: invalid name" +@@ -287,10 +287,10 @@ module REXML + raise REXML::ParseException.new(message, @source) + end + name = parse_name(base_error_message) +- if @source.match(/\s*\[/um, true) ++ if @source.match?(/\s*\[/um, true) + id = [nil, nil, nil] + @document_status = :in_doctype +- elsif @source.match(/\s*>/um, true) ++ elsif @source.match?(/\s*>/um, true) + id = [nil, nil, nil] + @document_status = :after_doctype + @source.ensure_buffer +@@ -302,9 +302,9 @@ module REXML + # For backward compatibility + id[1], id[2] = id[2], nil + end +- if @source.match(/\s*\[/um, true) ++ if @source.match?(/\s*\[/um, true) + @document_status = :in_doctype +- elsif @source.match(/\s*>/um, true) ++ elsif @source.match?(/\s*>/um, true) + @document_status = :after_doctype + @source.ensure_buffer + else +@@ -314,7 +314,7 @@ module REXML + end + args = [:start_doctype, name, *id] + if @document_status == :after_doctype +- @source.match(/\s*/um, true) ++ @source.match?(/\s*/um, true) + @stack << [ :end_doctype ] + end + return args +@@ -325,14 +325,14 @@ module REXML + end + end + if @document_status == :in_doctype +- @source.match(/\s*/um, true) # skip spaces ++ @source.match?(/\s*/um, true) # skip spaces + start_position = @source.position +- if @source.match("/um, true) + raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil? + return [ :elementdecl, "") ++ unless @source.match?(/\s+/um, true) ++ if @source.match?(">") + message = "#{base_error_message}: name is missing" + else + message = "#{base_error_message}: invalid name" +@@ -399,7 +399,7 @@ module REXML + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: true) +- unless @source.match(/\s*>/um, true) ++ unless @source.match?(/\s*>/um, true) + message = "#{base_error_message}: garbage before end >" + raise REXML::ParseException.new(message, @source) + end +@@ -413,7 +413,7 @@ module REXML + end + elsif match = @source.match(/(%.*?;)\s*/um, true) + return [ :externalentity, match[1] ] +- elsif @source.match(/\]\s*>/um, true) ++ elsif @source.match?(/\]\s*>/um, true) + @document_status = :after_doctype + return [ :end_doctype ] + end +@@ -422,16 +422,16 @@ module REXML + end + end + if @document_status == :after_doctype +- @source.match(/\s*/um, true) ++ @source.match?(/\s*/um, true) + end + begin + start_position = @source.position +- if @source.match("<", true) ++ if @source.match?("<", true) + # :text's read_until may remain only "<" in buffer. In the + # case, buffer is empty here. So we need to fill buffer + # here explicitly. + @source.ensure_buffer +- if @source.match("/", true) ++ if @source.match?("/", true) + @namespaces_restore_stack.pop + last_tag = @tags.pop + md = @source.match(Private::CLOSE_PATTERN, true) +@@ -446,7 +446,7 @@ module REXML + raise REXML::ParseException.new(message, @source) + end + return [ :end_element, last_tag ] +- elsif @source.match("!", true) ++ elsif @source.match?("!", true) + md = @source.match(/([^>]*>)/um) + #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" + raise REXML::ParseException.new("Malformed node", @source) unless md +@@ -464,7 +464,7 @@ module REXML + end + raise REXML::ParseException.new( "Declarations can only occur "+ + "in the doctype declaration.", @source) +- elsif @source.match("?", true) ++ elsif @source.match?("?", true) + return process_instruction + else + # Get the next tag +@@ -645,7 +645,7 @@ module REXML + def parse_name(base_error_message) + md = @source.match(Private::NAME_PATTERN, true) + unless md +- if @source.match(/\S/um) ++ if @source.match?(/\S/um) + message = "#{base_error_message}: invalid name" + else + message = "#{base_error_message}: name is missing" +@@ -687,34 +687,34 @@ module REXML + accept_public_id:) + public = /\A\s*PUBLIC/um + system = /\A\s*SYSTEM/um +- if (accept_external_id or accept_public_id) and @source.match(/#{public}/um) +- if @source.match(/#{public}(?:\s+[^'"]|\s*[\[>])/um) ++ if (accept_external_id or accept_public_id) and @source.match?(/#{public}/um) ++ if @source.match?(/#{public}(?:\s+[^'"]|\s*[\[>])/um) + return "public ID literal is missing" + end +- unless @source.match(/#{public}\s+#{PUBIDLITERAL}/um) ++ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}/um) + return "invalid public ID literal" + end + if accept_public_id +- if @source.match(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um) ++ if @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+[^'"]/um) + return "system ID literal is missing" + end +- unless @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) ++ unless @source.match?(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) + return "invalid system literal" + end + "garbage after system literal" + else + "garbage after public ID literal" + end +- elsif accept_external_id and @source.match(/#{system}/um) +- if @source.match(/#{system}(?:\s+[^'"]|\s*[\[>])/um) ++ elsif accept_external_id and @source.match?(/#{system}/um) ++ if @source.match?(/#{system}(?:\s+[^'"]|\s*[\[>])/um) + return "system literal is missing" + end +- unless @source.match(/#{system}\s+#{SYSTEMLITERAL}/um) ++ unless @source.match?(/#{system}\s+#{SYSTEMLITERAL}/um) + return "invalid system literal" + end + "garbage after system literal" + else +- unless @source.match(/\A\s*(?:PUBLIC|SYSTEM)\s/um) ++ unless @source.match?(/\A\s*(?:PUBLIC|SYSTEM)\s/um) + return "invalid ID type" + end + "ID type is missing" +@@ -723,7 +723,7 @@ module REXML + + def process_instruction + name = parse_name("Malformed XML: Invalid processing instruction node") +- if @source.match(/\s+/um, true) ++ if @source.match?(/\s+/um, true) + match_data = @source.match(/(.*?)\?>/um, true) + unless match_data + raise ParseException.new("Malformed XML: Unclosed processing instruction", @source) +@@ -731,7 +731,7 @@ module REXML + content = match_data[1] + else + content = nil +- unless @source.match("?>", true) ++ unless @source.match?("?>", true) + raise ParseException.new("Malformed XML: Unclosed processing instruction", @source) + end + end +@@ -761,9 +761,9 @@ module REXML + expanded_names = {} + closed = false + while true +- if @source.match(">", true) ++ if @source.match?(">", true) + return attributes, closed +- elsif @source.match("/>", true) ++ elsif @source.match?("/>", true) + closed = true + return attributes, closed + elsif match = @source.match(QNAME, true) +@@ -771,7 +771,7 @@ module REXML + prefix = match[2] + local_part = match[3] + +- unless @source.match(/\s*=\s*/um, true) ++ unless @source.match?(/\s*=\s*/um, true) + message = "Missing attribute equal: <#{name}>" + raise REXML::ParseException.new(message, @source) + end +@@ -787,7 +787,7 @@ module REXML + message = "Missing attribute value end quote: <#{name}>: <#{quote}>" + raise REXML::ParseException.new(message, @source) + end +- @source.match(/\s*/um, true) ++ @source.match?(/\s*/um, true) + if prefix == "xmlns" + if local_part == "xml" + if value != "http://www.w3.org/XML/1998/namespace" +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index ff887fc..b8e0768 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -18,6 +18,16 @@ module REXML + pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String) + super(pattern) + end ++ ++ def match?(pattern) ++ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String) ++ super(pattern) ++ end ++ ++ def skip(pattern) ++ pattern = /#{Regexp.escape(pattern)}/ if pattern.is_a?(String) ++ super(pattern) ++ end + end + end + using StringScannerCheckScanString +@@ -125,6 +135,14 @@ module REXML + end + end + ++ def match?(pattern, cons=false) ++ if cons ++ !@scanner.skip(pattern).nil? ++ else ++ !@scanner.match?(pattern).nil? ++ end ++ end ++ + def position + @scanner.pos + end +@@ -266,6 +284,23 @@ module REXML + md.nil? ? nil : @scanner + end + ++ def match?( pattern, cons=false ) ++ # To avoid performance issue, we need to increase bytes to read per scan ++ min_bytes = 1 ++ while true ++ if cons ++ n_matched_bytes = @scanner.skip(pattern) ++ else ++ n_matched_bytes = @scanner.match?(pattern) ++ end ++ return true if n_matched_bytes ++ return false if pattern.is_a?(String) ++ return false if @source.nil? ++ return false unless read(nil, min_bytes) ++ min_bytes *= 2 ++ end ++ end ++ + def empty? + super and ( @source.nil? || @source.eof? ) + end +-- +2.27.0 + diff --git a/backport-CVE-2025-58767.patch b/backport-CVE-2025-58767.patch new file mode 100644 index 0000000..72eaf72 --- /dev/null +++ b/backport-CVE-2025-58767.patch @@ -0,0 +1,370 @@ +From 5859bdeac792687eaf93d8e8f0b7e3c1e2ed5c23 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Sat, 23 Aug 2025 08:11:58 +0900 +Subject: [PATCH] Added XML declaration check & `Source#skip_spaces` method + (#282) + +Reference:https://github.com/ruby/rexml/commit/5859bde +Conflict:per_defined_terms is diff and miss commit 64a709e7 (ruby/rexml) + +## Why? + +### Added XML declaration check + +- The version attribute is required in XML declaration. +- Only version attribute, encoding attribute, and standalone attribute +are allowed in XML declaration. +- XML declaration is only allowed once. + +See: https://www.w3.org/TR/xml/#NT-XMLDecl + +### Added `Source#skip_spaces` method + +In the case of `@source.match?(/\s+/um, true)`, if there are no spaces +at the beginning, I want to stop reading immediately. +However, it continues to read the buffer until it finds a match, but it +never finds a match. +As a result, it continues reading until the end of the file. + +In the case of large XML files, drop_parsed_content occur frequently +until the buffer is cleared, which may affect performance. + + +## Benchmark + +``` + before after before(YJIT) after(YJIT) + dom 32.534 35.130 54.559 53.528 i/s - 100.000 times in 3.073715s 2.846540s 1.832883s 1.868189s + sax 44.785 44.089 78.303 77.842 i/s - 100.000 times in 2.232907s 2.268138s 1.277093s 1.284657s + pull 51.750 51.105 90.819 90.658 i/s - 100.000 times in 1.932351s 1.956759s 1.101094s 1.103050s + stream 51.427 51.444 89.820 88.971 i/s - 100.000 times in 1.944502s 1.943855s 1.113340s 1.123960s + +Comparison: + + dom + before(YJIT): 54.6 i/s + after(YJIT): 53.5 i/s - 1.02x slower + after: 35.1 i/s - 1.55x slower + before: 32.5 i/s - 1.68x slower + + sax + before(YJIT): 78.3 i/s + after(YJIT): 77.8 i/s - 1.01x slower + before: 44.8 i/s - 1.75x slower + after: 44.1 i/s - 1.78x slower + + pull + before(YJIT): 90.8 i/s + after(YJIT): 90.7 i/s - 1.00x slower + before: 51.8 i/s - 1.75x slower + after: 51.1 i/s - 1.78x slower + + stream + before(YJIT): 89.8 i/s + after(YJIT): 89.0 i/s - 1.01x slower + after: 51.4 i/s - 1.75x slower + before: 51.4 i/s - 1.75x slower +``` + +- YJIT=ON : 0.98x - 1.00x faster +- YJIT=OFF : 0.98x - 1.07x faster +--- + .../lib/rexml/parsers/baseparser.rb | 156 ++++++++++++------ + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 7 +- + 2 files changed, 113 insertions(+), 50 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index b06e990..05b6112 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -144,6 +144,7 @@ module REXML + PEREFERENCE_PATTERN = /#{PEREFERENCE}/um + TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um + CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um ++ EQUAL_PATTERN = /\s*=\s*/um + ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um + NAME_PATTERN = /#{NAME}/um + GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" +@@ -164,6 +165,7 @@ module REXML + @listeners = [] + @prefixes = Set.new + @entity_expansion_count = 0 ++ @version = nil + end + + def add_listener( listener ) +@@ -277,7 +279,7 @@ module REXML + return [ :comment, md[1] ] + elsif @source.match?("DOCTYPE", true) + base_error_message = "Malformed DOCTYPE" +- unless @source.match?(/\s+/um, true) ++ unless @source.skip_spaces + if @source.match?(">") + message = "#{base_error_message}: name is missing" + else +@@ -287,7 +289,7 @@ module REXML + raise REXML::ParseException.new(message, @source) + end + name = parse_name(base_error_message) +- @source.match?(/\s*/um, true) # skip spaces ++ @source.skip_spaces + if @source.match?("[", true) + id = [nil, nil, nil] + @document_status = :in_doctype +@@ -303,7 +305,7 @@ module REXML + # For backward compatibility + id[1], id[2] = id[2], nil + end +- @source.match?(/\s*/um, true) # skip spaces ++ @source.skip_spaces + if @source.match?("[", true) + @document_status = :in_doctype + elsif @source.match?(">", true) +@@ -316,7 +318,7 @@ module REXML + end + args = [:start_doctype, name, *id] + if @document_status == :after_doctype +- @source.match?(/\s*/um, true) ++ @source.skip_spaces + @stack << [ :end_doctype ] + end + return args +@@ -327,7 +329,7 @@ module REXML + end + end + if @document_status == :in_doctype +- @source.match?(/\s*/um, true) # skip spaces ++ @source.skip_spaces + start_position = @source.position + if @source.match?("") + message = "#{base_error_message}: name is missing" + else +@@ -401,7 +403,7 @@ module REXML + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: true) +- @source.match?(/\s*/um, true) # skip spaces ++ @source.skip_spaces + unless @source.match?(">", true) + message = "#{base_error_message}: garbage before end >" + raise REXML::ParseException.new(message, @source) +@@ -425,7 +427,7 @@ module REXML + end + end + if @document_status == :after_doctype +- @source.match?(/\s*/um, true) ++ @source.skip_spaces + end + begin + start_position = @source.position +@@ -645,6 +647,10 @@ module REXML + true + end + ++ def normalize_xml_declaration_encoding(xml_declaration_encoding) ++ /\AUTF-16(?:BE|LE)\z/i.match?(xml_declaration_encoding) ? "UTF-16" : nil ++ end ++ + def parse_name(base_error_message) + md = @source.match(Private::NAME_PATTERN, true) + unless md +@@ -726,37 +732,85 @@ module REXML + + def process_instruction + name = parse_name("Malformed XML: Invalid processing instruction node") +- if @source.match?(/\s+/um, true) +- match_data = @source.match(/(.*?)\?>/um, true) +- unless match_data +- raise ParseException.new("Malformed XML: Unclosed processing instruction", @source) ++ if name == "xml" ++ xml_declaration ++ else # PITarget ++ if @source.skip_spaces # e.g. ++ start_position = @source.position ++ content = @source.read_until("?>") ++ unless content.chomp!("?>") ++ @source.position = start_position ++ raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source) ++ end ++ else # e.g. ++ content = nil ++ unless @source.match?("?>", true) ++ raise ParseException.new("Malformed XML: Unclosed processing instruction: <#{name}>", @source) ++ end + end +- content = match_data[1] +- else +- content = nil ++ [:processing_instruction, name, content] ++ end ++ end ++ ++ def xml_declaration ++ unless @version.nil? ++ raise ParseException.new("Malformed XML: XML declaration is duplicated", @source) ++ end ++ if @document_status ++ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source) ++ end ++ unless @source.skip_spaces ++ raise ParseException.new("Malformed XML: XML declaration misses spaces before version", @source) ++ end ++ unless @source.match?("version", true) ++ raise ParseException.new("Malformed XML: XML declaration misses version", @source) ++ end ++ @version = parse_attribute_value_with_equal("xml") ++ unless @source.skip_spaces + unless @source.match?("?>", true) +- raise ParseException.new("Malformed XML: Unclosed processing instruction", @source) ++ raise ParseException.new("Malformed XML: Unclosed XML declaration", @source) + end ++ encoding = normalize_xml_declaration_encoding(@source.encoding) ++ return [ :xmldecl, @version, encoding, nil ] # e.g. + end +- if name == "xml" +- if @document_status +- raise ParseException.new("Malformed XML: XML declaration is not at the start", @source) +- end +- version = VERSION.match(content) +- version = version[1] unless version.nil? +- encoding = ENCODING.match(content) +- encoding = encoding[1] unless encoding.nil? +- if need_source_encoding_update?(encoding) +- @source.encoding = encoding ++ ++ if @source.match?("encoding", true) ++ encoding = parse_attribute_value_with_equal("xml") ++ unless @source.skip_spaces ++ unless @source.match?("?>", true) ++ raise ParseException.new("Malformed XML: Unclosed XML declaration", @source) ++ end ++ if need_source_encoding_update?(encoding) ++ @source.encoding = encoding ++ end ++ encoding ||= normalize_xml_declaration_encoding(@source.encoding) ++ return [ :xmldecl, @version, encoding, nil ] # e.g. + end +- if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding +- encoding = "UTF-16" ++ end ++ ++ if @source.match?("standalone", true) ++ standalone = parse_attribute_value_with_equal("xml") ++ case standalone ++ when "yes", "no" ++ else ++ raise ParseException.new("Malformed XML: XML declaration standalone is not yes or no : <#{standalone}>", @source) + end +- standalone = STANDALONE.match(content) +- standalone = standalone[1] unless standalone.nil? +- return [ :xmldecl, version, encoding, standalone ] + end +- [:processing_instruction, name, content] ++ @source.skip_spaces ++ unless @source.match?("?>", true) ++ raise ParseException.new("Malformed XML: Unclosed XML declaration", @source) ++ end ++ ++ if need_source_encoding_update?(encoding) ++ @source.encoding = encoding ++ end ++ encoding ||= normalize_xml_declaration_encoding(@source.encoding) ++ ++ # e.g. ++ # ++ # ++ # ++ [ :xmldecl, @version, encoding, standalone ] + end + + if StringScanner::Version < "3.1.1" +@@ -778,6 +832,25 @@ module REXML + end + end + ++ def parse_attribute_value_with_equal(name) ++ unless @source.match?(Private::EQUAL_PATTERN, true) ++ message = "Missing attribute equal: <#{name}>" ++ raise REXML::ParseException.new(message, @source) ++ end ++ unless quote = scan_quote ++ message = "Missing attribute value start quote: <#{name}>" ++ raise REXML::ParseException.new(message, @source) ++ end ++ start_position = @source.position ++ value = @source.read_until(quote) ++ unless value.chomp!(quote) ++ @source.position = start_position ++ message = "Missing attribute value end quote: <#{name}>: <#{quote}>" ++ raise REXML::ParseException.new(message, @source) ++ end ++ value ++ end ++ + def parse_attributes(prefixes) + attributes = {} + expanded_names = {} +@@ -792,23 +865,8 @@ module REXML + name = match[1] + prefix = match[2] + local_part = match[3] +- +- unless @source.match?(/\s*=\s*/um, true) +- message = "Missing attribute equal: <#{name}>" +- raise REXML::ParseException.new(message, @source) +- end +- unless quote = scan_quote +- message = "Missing attribute value start quote: <#{name}>" +- raise REXML::ParseException.new(message, @source) +- end +- start_position = @source.position +- value = @source.read_until(quote) +- unless value.chomp!(quote) +- @source.position = start_position +- message = "Missing attribute value end quote: <#{name}>: <#{quote}>" +- raise REXML::ParseException.new(message, @source) +- end +- @source.match?(/\s*/um, true) ++ value = parse_attribute_value_with_equal(name) ++ @source.skip_spaces + if prefix == "xmlns" + if local_part == "xml" + if value != "http://www.w3.org/XML/1998/namespace" +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index 7715a8e..22a1556 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -65,9 +65,10 @@ module REXML + attr_reader :encoding + + module Private ++ SPACES_PATTERN = /\s+/um + SCANNER_RESET_SIZE = 100000 + PRE_DEFINED_TERM_PATTERNS = {} +- pre_defined_terms = ["'", '"', "<"] ++ pre_defined_terms = ["'", '"', "<", "?>"] + pre_defined_terms.each do |term| + PRE_DEFINED_TERM_PATTERNS[term] = /#{Regexp.escape(term)}/ + end +@@ -143,6 +144,10 @@ module REXML + end + end + ++ def skip_spaces ++ @scanner.skip(Private::SPACES_PATTERN) ? true : false ++ end ++ + def position + @scanner.pos + end +-- +2.27.0 + diff --git a/backport-CVE-2025-61594.patch b/backport-CVE-2025-61594.patch new file mode 100644 index 0000000..3f77f8d --- /dev/null +++ b/backport-CVE-2025-61594.patch @@ -0,0 +1,121 @@ +From 5cec76b9e8777764344fd4aee140e309ad207b68 Mon Sep 17 00:00:00 2001 +From: Nobuyoshi Nakada +Date: Sat, 12 Jul 2025 11:51:31 +0900 +Subject: [PATCH] Clear user info totally at setting any of authority info + +Fix CVE-2025-27221. +https://hackerone.com/reports/3221142 +--- + lib/uri/generic.rb | 10 ++++++---- + test/uri/test_generic.rb | 15 ++++++++++----- + 2 files changed, 16 insertions(+), 9 deletions(-) + +diff --git a/lib/uri/generic.rb b/lib/uri/generic.rb +index f7eed57..4fb53f4 100644 +--- a/lib/uri/generic.rb ++++ b/lib/uri/generic.rb +@@ -186,18 +186,18 @@ module URI + + if arg_check + self.scheme = scheme +- self.userinfo = userinfo + self.hostname = host + self.port = port ++ self.userinfo = userinfo + self.path = path + self.query = query + self.opaque = opaque + self.fragment = fragment + else + self.set_scheme(scheme) +- self.set_userinfo(userinfo) + self.set_host(host) + self.set_port(port) ++ self.set_userinfo(userinfo) + self.set_path(path) + self.query = query + self.set_opaque(opaque) +@@ -511,7 +511,7 @@ module URI + user, password = split_userinfo(user) + end + @user = user +- @password = password if password ++ @password = password + + [@user, @password] + end +@@ -522,7 +522,7 @@ module URI + # See also URI::Generic.user=. + # + def set_user(v) +- set_userinfo(v, @password) ++ set_userinfo(v, nil) + v + end + protected :set_user +@@ -639,6 +639,7 @@ module URI + def host=(v) + check_host(v) + set_host(v) ++ set_userinfo(nil) + v + end + +@@ -729,6 +730,7 @@ module URI + def port=(v) + check_port(v) + set_port(v) ++ set_userinfo(nil) + port + end + +diff --git a/test/uri/test_generic.rb b/test/uri/test_generic.rb +index 4b5e12c..d54554f 100644 +--- a/test/uri/test_generic.rb ++++ b/test/uri/test_generic.rb +@@ -272,6 +272,9 @@ class URI::TestGeneric < Test::Unit::TestCase + u0 = URI.parse('http://new.example.org/path') + u1 = u.merge('//new.example.org/path') + assert_equal(u0, u1) ++ u0 = URI.parse('http://other@example.net') ++ u1 = u.merge('//other@example.net') ++ assert_equal(u0, u1) + end + + def test_route +@@ -737,17 +740,18 @@ class URI::TestGeneric < Test::Unit::TestCase + def test_set_component + uri = URI.parse('http://foo:bar@baz') + assert_equal('oof', uri.user = 'oof') +- assert_equal('http://oof:bar@baz', uri.to_s) ++ assert_equal('http://oof@baz', uri.to_s) + assert_equal('rab', uri.password = 'rab') + assert_equal('http://oof:rab@baz', uri.to_s) + assert_equal('foo', uri.userinfo = 'foo') +- assert_equal('http://foo:rab@baz', uri.to_s) ++ assert_equal('http://foo@baz', uri.to_s) + assert_equal(['foo', 'bar'], uri.userinfo = ['foo', 'bar']) + assert_equal('http://foo:bar@baz', uri.to_s) + assert_equal(['foo'], uri.userinfo = ['foo']) +- assert_equal('http://foo:bar@baz', uri.to_s) ++ assert_equal('http://foo@baz', uri.to_s) + assert_equal('zab', uri.host = 'zab') +- assert_equal('http://foo:bar@zab', uri.to_s) ++ assert_equal('http://zab', uri.to_s) ++ uri.userinfo = ['foo', 'bar'] + uri.port = "" + assert_nil(uri.port) + uri.port = "80" +@@ -757,7 +761,8 @@ class URI::TestGeneric < Test::Unit::TestCase + uri.port = " 080 " + assert_equal(80, uri.port) + assert_equal(8080, uri.port = 8080) +- assert_equal('http://foo:bar@zab:8080', uri.to_s) ++ assert_equal('http://zab:8080', uri.to_s) ++ uri = URI.parse('http://foo:bar@zab:8080') + assert_equal('/', uri.path = '/') + assert_equal('http://foo:bar@zab:8080/', uri.to_s) + assert_equal('a=1', uri.query = 'a=1') +-- +2.27.0 + diff --git a/backport-Reduced-regular-expression-processing-in-the-form-of.patch b/backport-Reduced-regular-expression-processing-in-the-form-of.patch new file mode 100644 index 0000000..a403004 --- /dev/null +++ b/backport-Reduced-regular-expression-processing-in-the-form-of.patch @@ -0,0 +1,92 @@ +From 67d21be36c87d23b7a00c4f50017d9db977319d2 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Sun, 26 Jan 2025 19:56:59 +0900 +Subject: [PATCH] Reduced regular expression processing in the form of + processing white space first (#237) + +## Benchmark +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.4.1/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.4.1 (2024-12-25 revision 48d4efcb85) +PRISM [arm64-darwin24] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 19.849 20.109 36.064 38.655 i/s - 100.000 times in 5.038102s 4.972864s 2.772838s 2.586981s + sax 30.339 30.449 52.946 54.873 i/s - 100.000 times in 3.296102s 3.284176s 1.888722s 1.822391s + pull 34.785 34.916 65.808 65.219 i/s - 100.000 times in 2.874810s 2.863976s 1.519581s 1.533305s + stream 34.766 34.921 61.920 63.277 i/s - 100.000 times in 2.876359s 2.863571s 1.615000s 1.580354s + +Comparison: + dom + after(YJIT): 38.7 i/s + before(YJIT): 36.1 i/s - 1.07x slower + after: 20.1 i/s - 1.92x slower + before: 19.8 i/s - 1.95x slower + + sax + after(YJIT): 54.9 i/s + before(YJIT): 52.9 i/s - 1.04x slower + after: 30.4 i/s - 1.80x slower + before: 30.3 i/s - 1.81x slower + + pull + before(YJIT): 65.8 i/s + after(YJIT): 65.2 i/s - 1.01x slower + after: 34.9 i/s - 1.88x slower + before: 34.8 i/s - 1.89x slower + + stream + after(YJIT): 63.3 i/s + before(YJIT): 61.9 i/s - 1.02x slower + after: 34.9 i/s - 1.81x slower + before: 34.8 i/s - 1.82x slower +``` +- YJIT=ON : 0.99x - 1.07x faster +- YJIT=OFF : 1.00x - 1.01x faster +--- + .../rexml-3.2.5/lib/rexml/parsers/baseparser.rb | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index c25be0a..b06e990 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -287,10 +287,11 @@ module REXML + raise REXML::ParseException.new(message, @source) + end + name = parse_name(base_error_message) +- if @source.match?(/\s*\[/um, true) ++ @source.match?(/\s*/um, true) # skip spaces ++ if @source.match?("[", true) + id = [nil, nil, nil] + @document_status = :in_doctype +- elsif @source.match?(/\s*>/um, true) ++ elsif @source.match?(">", true) + id = [nil, nil, nil] + @document_status = :after_doctype + @source.ensure_buffer +@@ -302,9 +303,10 @@ module REXML + # For backward compatibility + id[1], id[2] = id[2], nil + end +- if @source.match?(/\s*\[/um, true) ++ @source.match?(/\s*/um, true) # skip spaces ++ if @source.match?("[", true) + @document_status = :in_doctype +- elsif @source.match?(/\s*>/um, true) ++ elsif @source.match?(">", true) + @document_status = :after_doctype + @source.ensure_buffer + else +@@ -399,7 +401,8 @@ module REXML + id = parse_id(base_error_message, + accept_external_id: true, + accept_public_id: true) +- unless @source.match?(/\s*>/um, true) ++ @source.match?(/\s*/um, true) # skip spaces ++ unless @source.match?(">", true) + message = "#{base_error_message}: garbage before end >" + raise REXML::ParseException.new(message, @source) + end +-- +2.27.0 + diff --git a/backport-Use-StringScanner-peek_byte-to-get-double-or-single-.patch b/backport-Use-StringScanner-peek_byte-to-get-double-or-single-.patch new file mode 100644 index 0000000..cfcc35e --- /dev/null +++ b/backport-Use-StringScanner-peek_byte-to-get-double-or-single-.patch @@ -0,0 +1,121 @@ +From b70388c2638d90ebd2ae471bd85239d8469b8e62 Mon Sep 17 00:00:00 2001 +From: NAITOH Jun +Date: Sat, 21 Dec 2024 07:59:47 +0900 +Subject: [PATCH] Use `StringScanner#peek_byte` to get double or single + quotation mark (#227) + +## Why? +`StringScanner#peek_byte` is fast, because it does not generate String +object. + +## Benchmark +``` +RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.4/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml +ruby 3.3.4 (2024-07-09 revision be1089c8ec) [arm64-darwin22] +Calculating ------------------------------------- + before after before(YJIT) after(YJIT) + dom 19.753 19.888 35.641 35.928 i/s - 100.000 times in 5.062402s 5.028121s 2.805792s 2.783339s + sax 30.349 30.978 53.485 57.885 i/s - 100.000 times in 3.295012s 3.228103s 1.869671s 1.727567s + pull 34.170 35.436 61.713 66.534 i/s - 100.000 times in 2.926534s 2.821955s 1.620404s 1.502996s + stream 33.121 35.268 60.751 63.276 i/s - 100.000 times in 3.019222s 2.835443s 1.646065s 1.580374s + +Comparison: + dom + after(YJIT): 35.9 i/s + before(YJIT): 35.6 i/s - 1.01x slower + after: 19.9 i/s - 1.81x slower + before: 19.8 i/s - 1.82x slower + + sax + after(YJIT): 57.9 i/s + before(YJIT): 53.5 i/s - 1.08x slower + after: 31.0 i/s - 1.87x slower + before: 30.3 i/s - 1.91x slower + + pull + after(YJIT): 66.5 i/s + before(YJIT): 61.7 i/s - 1.08x slower + after: 35.4 i/s - 1.88x slower + before: 34.2 i/s - 1.95x slower + + stream + after(YJIT): 63.3 i/s + before(YJIT): 60.8 i/s - 1.04x slower + after: 35.3 i/s - 1.79x slower + before: 33.1 i/s - 1.91x slower + +``` +- YJIT=ON : 1.01x - 1.08x faster +- YJIT=OFF : 1.00x - 1.06x faster + +Co-authored-by: Sutou Kouhei +--- + .../lib/rexml/parsers/baseparser.rb | 22 +++++++++++++++++-- + .bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 8 +++++++ + 2 files changed, 28 insertions(+), 2 deletions(-) + +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +index b0e5988..c25be0a 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +@@ -756,6 +756,25 @@ module REXML + [:processing_instruction, name, content] + end + ++ if StringScanner::Version < "3.1.1" ++ def scan_quote ++ @source.match(/(['"])/, true)&.[](1) ++ end ++ else ++ def scan_quote ++ case @source.peek_byte ++ when 34 # '"'.ord ++ @source.scan_byte ++ '"' ++ when 39 # "'".ord ++ @source.scan_byte ++ "'" ++ else ++ nil ++ end ++ end ++ end ++ + def parse_attributes(prefixes) + attributes = {} + expanded_names = {} +@@ -775,11 +794,10 @@ module REXML + message = "Missing attribute equal: <#{name}>" + raise REXML::ParseException.new(message, @source) + end +- unless match = @source.match(/(['"])/, true) ++ unless quote = scan_quote + message = "Missing attribute value start quote: <#{name}>" + raise REXML::ParseException.new(message, @source) + end +- quote = match[1] + start_position = @source.position + value = @source.read_until(quote) + unless value.chomp!(quote) +diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +index b8e0768..7715a8e 100644 +--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb ++++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb +@@ -151,6 +151,14 @@ module REXML + @scanner.pos = pos + end + ++ def peek_byte ++ @scanner.peek_byte ++ end ++ ++ def scan_byte ++ @scanner.scan_byte ++ end ++ + # @return true if the Source is exhausted + def empty? + @scanner.eos? +-- +2.27.0 + diff --git a/ruby.spec b/ruby.spec index 36a033c..93f645a 100644 --- a/ruby.spec +++ b/ruby.spec @@ -33,7 +33,7 @@ Name: ruby Version: %{ruby_version} -Release: 150 +Release: 151 Summary: Object-oriented scripting language interpreter License: (Ruby or BSD) and Public Domain and MIT and CC0 and zlib and UCD URL: https://www.ruby-lang.org/en/ @@ -122,6 +122,11 @@ Patch6045: backport-0001-CVE-2025-43857.patch Patch6046: backport-0002-CVE-2025-43857.patch Patch6047: backport-0003-CVE-2025-43857.patch Patch6048: backport-0004-CVE-2025-43857.patch +Patch6049: backport-Add-IOSource-match-method-216.patch +Patch6050: backport-Use-StringScanner-peek_byte-to-get-double-or-single-.patch +Patch6051: backport-Reduced-regular-expression-processing-in-the-form-of.patch +Patch6052: backport-CVE-2025-58767.patch +Patch6053: backport-CVE-2025-61594.patch Provides: %{name}-libs = %{version}-%{release} Obsoletes: %{name}-libs < %{version}-%{release} @@ -907,6 +912,9 @@ make runruby TESTRUN_SCRIPT=%{SOURCE13} %{gem_dir}/specifications/matrix-%{matrix_version}.gemspec %changelog +* Sat Nov 08 2025 shixuantong - 3.2.2-151 +- fix CVE-2025-58767 CVE-2025-61594 + * Thu Jun 05 2025 shixuantong - 3.2.2-150 - fix CVE-2025-43857 -- Gitee