|
| 1 | +From 66d3d405337c1dea5b4522bf87e06a8cfe815298 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Kevin Lockwood <v-klockwood@microsoft.com> |
| 3 | +Date: Tue, 18 Feb 2025 12:13:44 -0800 |
| 4 | +Subject: [PATCH] [Medium] rubygem-rexml: Patch CVE-2024-39908 |
| 5 | + |
| 6 | +Link: https://github.com/ruby/rexml/raw/refs/tags/v3.3.2/lib/rexml/parsers/baseparser.rb |
| 7 | +--- |
| 8 | + lib/rexml/parsers/baseparser.rb | 126 ++++++++++++++++++++++++-------- |
| 9 | + 1 file changed, 97 insertions(+), 29 deletions(-) |
| 10 | + |
| 11 | +diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb |
| 12 | +index 25bc371..a2818ae 100644 |
| 13 | +--- a/lib/rexml/parsers/baseparser.rb |
| 14 | ++++ b/lib/rexml/parsers/baseparser.rb |
| 15 | +@@ -7,6 +7,17 @@ require "strscan" |
| 16 | + |
| 17 | + module REXML |
| 18 | + module Parsers |
| 19 | ++ if StringScanner::Version < "3.0.8" |
| 20 | ++ module StringScannerCaptures |
| 21 | ++ refine StringScanner do |
| 22 | ++ def captures |
| 23 | ++ values_at(*(1...size)) |
| 24 | ++ end |
| 25 | ++ end |
| 26 | ++ end |
| 27 | ++ using StringScannerCaptures |
| 28 | ++ end |
| 29 | ++ |
| 30 | + # = Using the Pull Parser |
| 31 | + # <em>This API is experimental, and subject to change.</em> |
| 32 | + # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" ) |
| 33 | +@@ -113,6 +124,14 @@ module REXML |
| 34 | + } |
| 35 | + |
| 36 | + module Private |
| 37 | ++ # Terminal requires two or more letters. |
| 38 | ++ INSTRUCTION_TERM = "?>" |
| 39 | ++ COMMENT_TERM = "-->" |
| 40 | ++ CDATA_TERM = "]]>" |
| 41 | ++ DOCTYPE_TERM = "]>" |
| 42 | ++ # Read to the end of DOCTYPE because there is no proper ENTITY termination |
| 43 | ++ ENTITY_TERM = DOCTYPE_TERM |
| 44 | ++ |
| 45 | + INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um |
| 46 | + TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um |
| 47 | + CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um |
| 48 | +@@ -121,14 +140,21 @@ module REXML |
| 49 | + GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>" |
| 50 | + PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" |
| 51 | + ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um |
| 52 | ++ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/ |
| 53 | ++ CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ |
| 54 | ++ DEFAULT_ENTITIES_PATTERNS = {} |
| 55 | ++ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp'] |
| 56 | ++ default_entities.each do |term| |
| 57 | ++ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/ |
| 58 | ++ end |
| 59 | + end |
| 60 | + private_constant :Private |
| 61 | +- include Private |
| 62 | + |
| 63 | + def initialize( source ) |
| 64 | + self.stream = source |
| 65 | + @listeners = [] |
| 66 | + @entity_expansion_count = 0 |
| 67 | ++ @prefixes = Set.new |
| 68 | + end |
| 69 | + |
| 70 | + def add_listener( listener ) |
| 71 | +@@ -141,6 +167,7 @@ module REXML |
| 72 | + def stream=( source ) |
| 73 | + @source = SourceFactory.create_from( source ) |
| 74 | + @closed = nil |
| 75 | ++ @have_root = false |
| 76 | + @document_status = nil |
| 77 | + @tags = [] |
| 78 | + @stack = [] |
| 79 | +@@ -195,6 +222,8 @@ module REXML |
| 80 | + |
| 81 | + # Returns the next event. This is a +PullEvent+ object. |
| 82 | + def pull |
| 83 | ++ @source.drop_parsed_content |
| 84 | ++ |
| 85 | + pull_event.tap do |event| |
| 86 | + @listeners.each do |listener| |
| 87 | + listener.receive event |
| 88 | +@@ -207,7 +236,12 @@ module REXML |
| 89 | + x, @closed = @closed, nil |
| 90 | + return [ :end_element, x ] |
| 91 | + end |
| 92 | +- return [ :end_document ] if empty? |
| 93 | ++ if empty? |
| 94 | ++ if @document_status == :in_doctype |
| 95 | ++ raise ParseException.new("Malformed DOCTYPE: unclosed", @source) |
| 96 | ++ end |
| 97 | ++ return [ :end_document ] |
| 98 | ++ end |
| 99 | + return @stack.shift if @stack.size > 0 |
| 100 | + #STDERR.puts @source.encoding |
| 101 | + #STDERR.puts "BUFFER = #{@source.buffer.inspect}" |
| 102 | +@@ -219,7 +253,14 @@ module REXML |
| 103 | + return process_instruction(start_position) |
| 104 | + elsif @source.match("<!", true) |
| 105 | + if @source.match("--", true) |
| 106 | +- return [ :comment, @source.match(/(.*?)-->/um, true)[1] ] |
| 107 | ++ md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM) |
| 108 | ++ if md.nil? |
| 109 | ++ raise REXML::ParseException.new("Unclosed comment", @source) |
| 110 | ++ end |
| 111 | ++ if /--|-\z/.match?(md[1]) |
| 112 | ++ raise REXML::ParseException.new("Malformed comment", @source) |
| 113 | ++ end |
| 114 | ++ return [ :comment, md[1] ] |
| 115 | + elsif @source.match("DOCTYPE", true) |
| 116 | + base_error_message = "Malformed DOCTYPE" |
| 117 | + unless @source.match(/\s+/um, true) |
| 118 | +@@ -231,7 +272,7 @@ module REXML |
| 119 | + @source.position = start_position |
| 120 | + raise REXML::ParseException.new(message, @source) |
| 121 | + end |
| 122 | +- @nsstack.unshift(curr_ns=Set.new) |
| 123 | ++ @nsstack.unshift(Set.new) |
| 124 | + name = parse_name(base_error_message) |
| 125 | + if @source.match(/\s*\[/um, true) |
| 126 | + id = [nil, nil, nil] |
| 127 | +@@ -279,7 +320,7 @@ module REXML |
| 128 | + raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil? |
| 129 | + return [ :elementdecl, "<!ELEMENT" + md[1] ] |
| 130 | + elsif @source.match("ENTITY", true) |
| 131 | +- match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact] |
| 132 | ++ match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact] |
| 133 | + ref = false |
| 134 | + if match[1] == '%' |
| 135 | + ref = true |
| 136 | +@@ -305,13 +346,13 @@ module REXML |
| 137 | + match << '%' if ref |
| 138 | + return match |
| 139 | + elsif @source.match("ATTLIST", true) |
| 140 | +- md = @source.match(ATTLISTDECL_END, true) |
| 141 | ++ md = @source.match(Private::ATTLISTDECL_END, true) |
| 142 | + raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil? |
| 143 | + element = md[1] |
| 144 | + contents = md[0] |
| 145 | + |
| 146 | + pairs = {} |
| 147 | +- values = md[0].scan( ATTDEF_RE ) |
| 148 | ++ values = md[0].strip.scan( ATTDEF_RE ) |
| 149 | + values.each do |attdef| |
| 150 | + unless attdef[3] == "#IMPLIED" |
| 151 | + attdef.compact! |
| 152 | +@@ -344,19 +385,22 @@ module REXML |
| 153 | + raise REXML::ParseException.new(message, @source) |
| 154 | + end |
| 155 | + return [:notationdecl, name, *id] |
| 156 | +- elsif md = @source.match(/--(.*?)-->/um, true) |
| 157 | ++ elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM) |
| 158 | + case md[1] |
| 159 | + when /--/, /-\z/ |
| 160 | + raise REXML::ParseException.new("Malformed comment", @source) |
| 161 | + end |
| 162 | + return [ :comment, md[1] ] if md |
| 163 | + end |
| 164 | +- elsif match = @source.match(/(%.*?;)\s*/um, true) |
| 165 | ++ elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM) |
| 166 | + return [ :externalentity, match[1] ] |
| 167 | + elsif @source.match(/\]\s*>/um, true) |
| 168 | + @document_status = :after_doctype |
| 169 | + return [ :end_doctype ] |
| 170 | + end |
| 171 | ++ if @document_status == :in_doctype |
| 172 | ++ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source) |
| 173 | ++ end |
| 174 | + end |
| 175 | + if @document_status == :after_doctype |
| 176 | + @source.match(/\s*/um, true) |
| 177 | +@@ -364,10 +408,14 @@ module REXML |
| 178 | + begin |
| 179 | + start_position = @source.position |
| 180 | + if @source.match("<", true) |
| 181 | ++ # :text's read_until may remain only "<" in buffer. In the |
| 182 | ++ # case, buffer is empty here. So we need to fill buffer |
| 183 | ++ # here explicitly. |
| 184 | ++ @source.ensure_buffer |
| 185 | + if @source.match("/", true) |
| 186 | + @nsstack.shift |
| 187 | + last_tag = @tags.pop |
| 188 | +- md = @source.match(CLOSE_PATTERN, true) |
| 189 | ++ md = @source.match(Private::CLOSE_PATTERN, true) |
| 190 | + if md and !last_tag |
| 191 | + message = "Unexpected top-level end tag (got '#{md[1]}')" |
| 192 | + raise REXML::ParseException.new(message, @source) |
| 193 | +@@ -384,16 +432,15 @@ module REXML |
| 194 | + #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" |
| 195 | + raise REXML::ParseException.new("Malformed node", @source) unless md |
| 196 | + if md[0][0] == ?- |
| 197 | +- md = @source.match(/--(.*?)-->/um, true) |
| 198 | ++ md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM) |
| 199 | + |
| 200 | +- case md[1] |
| 201 | +- when /--/, /-\z/ |
| 202 | ++ if md.nil? || /--|-\z/.match?(md[1]) |
| 203 | + raise REXML::ParseException.new("Malformed comment", @source) |
| 204 | + end |
| 205 | + |
| 206 | +- return [ :comment, md[1] ] if md |
| 207 | ++ return [ :comment, md[1] ] |
| 208 | + else |
| 209 | +- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true) |
| 210 | ++ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM) |
| 211 | + return [ :cdata, md[1] ] if md |
| 212 | + end |
| 213 | + raise REXML::ParseException.new( "Declarations can only occur "+ |
| 214 | +@@ -402,19 +449,19 @@ module REXML |
| 215 | + return process_instruction(start_position) |
| 216 | + else |
| 217 | + # Get the next tag |
| 218 | +- md = @source.match(TAG_PATTERN, true) |
| 219 | ++ md = @source.match(Private::TAG_PATTERN, true) |
| 220 | + unless md |
| 221 | + @source.position = start_position |
| 222 | + raise REXML::ParseException.new("malformed XML: missing tag start", @source) |
| 223 | + end |
| 224 | + tag = md[1] |
| 225 | + @document_status = :in_element |
| 226 | +- prefixes = Set.new |
| 227 | +- prefixes << md[2] if md[2] |
| 228 | ++ @prefixes.clear |
| 229 | ++ @prefixes << md[2] if md[2] |
| 230 | + @nsstack.unshift(curr_ns=Set.new) |
| 231 | +- attributes, closed = parse_attributes(prefixes, curr_ns) |
| 232 | ++ attributes, closed = parse_attributes(@prefixes, curr_ns) |
| 233 | + # Verify that all of the prefixes have been defined |
| 234 | +- for prefix in prefixes |
| 235 | ++ for prefix in @prefixes |
| 236 | + unless @nsstack.find{|k| k.member?(prefix)} |
| 237 | + raise UndefinedNamespaceException.new(prefix,@source,self) |
| 238 | + end |
| 239 | +@@ -424,13 +471,25 @@ module REXML |
| 240 | + @closed = tag |
| 241 | + @nsstack.shift |
| 242 | + else |
| 243 | ++ if @tags.empty? and @have_root |
| 244 | ++ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source) |
| 245 | ++ end |
| 246 | + @tags.push( tag ) |
| 247 | + end |
| 248 | ++ @have_root = true |
| 249 | + return [ :start_element, tag, attributes ] |
| 250 | + end |
| 251 | + else |
| 252 | +- md = @source.match(/([^<]*)/um, true) |
| 253 | +- text = md[1] |
| 254 | ++ text = @source.read_until("<") |
| 255 | ++ if text.chomp!("<") |
| 256 | ++ @source.position -= "<".bytesize |
| 257 | ++ end |
| 258 | ++ if @tags.empty? and @have_root |
| 259 | ++ unless /\A\s*\z/.match?(text) |
| 260 | ++ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source) |
| 261 | ++ end |
| 262 | ++ return pull_event |
| 263 | ++ end |
| 264 | + return [ :text, text ] |
| 265 | + end |
| 266 | + rescue REXML::UndefinedNamespaceException |
| 267 | +@@ -475,10 +534,14 @@ module REXML |
| 268 | + |
| 269 | + # Unescapes all possible entities |
| 270 | + def unnormalize( string, entities=nil, filter=nil ) |
| 271 | +- rv = string.gsub( /\r\n?/, "\n" ) |
| 272 | ++ if string.include?("\r") |
| 273 | ++ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" ) |
| 274 | ++ else |
| 275 | ++ rv = string.dup |
| 276 | ++ end |
| 277 | + matches = rv.scan( REFERENCE_RE ) |
| 278 | + return rv if matches.size == 0 |
| 279 | +- rv.gsub!( /&#((?:\d+)|(?:x[a-fA-F0-9]+));/ ) { |
| 280 | ++ rv.gsub!( Private::CHARACTER_REFERENCES ) { |
| 281 | + m=$1 |
| 282 | + if m.start_with?("x") |
| 283 | + code_point = Integer(m[1..-1], 16) |
| 284 | +@@ -494,7 +557,7 @@ module REXML |
| 285 | + unless filter and filter.include?(entity_reference) |
| 286 | + entity_value = entity( entity_reference, entities ) |
| 287 | + if entity_value |
| 288 | +- re = /&#{entity_reference};/ |
| 289 | ++ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/ |
| 290 | + rv.gsub!( re, entity_value ) |
| 291 | + sum += rv.bytesize |
| 292 | + if sum > Security.entity_expansion_text_limit |
| 293 | +@@ -506,7 +569,7 @@ module REXML |
| 294 | + end |
| 295 | + end |
| 296 | + end |
| 297 | +- rv.gsub!( /&/, '&' ) |
| 298 | ++ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' ) |
| 299 | + end |
| 300 | + rv |
| 301 | + end |
| 302 | +@@ -527,7 +590,7 @@ module REXML |
| 303 | + end |
| 304 | + |
| 305 | + def parse_name(base_error_message) |
| 306 | +- md = @source.match(NAME_PATTERN, true) |
| 307 | ++ md = @source.match(Private::NAME_PATTERN, true) |
| 308 | + unless md |
| 309 | + if @source.match(/\s*\S/um) |
| 310 | + message = "#{base_error_message}: invalid name" |
| 311 | +@@ -606,13 +669,16 @@ module REXML |
| 312 | + end |
| 313 | + |
| 314 | + def process_instruction(start_position) |
| 315 | +- match_data = @source.match(INSTRUCTION_END, true) |
| 316 | ++ match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM) |
| 317 | + unless match_data |
| 318 | + message = "Invalid processing instruction node" |
| 319 | + @source.position = start_position |
| 320 | + raise REXML::ParseException.new(message, @source) |
| 321 | + end |
| 322 | +- if @document_status.nil? and match_data[1] == "xml" |
| 323 | ++ if match_data[1] == "xml" |
| 324 | ++ if @document_status |
| 325 | ++ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source) |
| 326 | ++ end |
| 327 | + content = match_data[2] |
| 328 | + version = VERSION.match(content) |
| 329 | + version = version[1] unless version.nil? |
| 330 | +@@ -654,8 +720,10 @@ module REXML |
| 331 | + raise REXML::ParseException.new(message, @source) |
| 332 | + end |
| 333 | + quote = match[1] |
| 334 | ++ start_position = @source.position |
| 335 | + value = @source.read_until(quote) |
| 336 | + unless value.chomp!(quote) |
| 337 | ++ @source.position = start_position |
| 338 | + message = "Missing attribute value end quote: <#{name}>: <#{quote}>" |
| 339 | + raise REXML::ParseException.new(message, @source) |
| 340 | + end |
| 341 | +-- |
| 342 | +2.34.1 |
| 343 | + |
0 commit comments