1- From 66d3d405337c1dea5b4522bf87e06a8cfe815298 Mon Sep 17 00:00:00 2001
2- From: Kevin Lockwood <v-klockwood@microsoft.com>
3- Date: Tue, 18 Feb 2025 12:13:44 -0800
4- Subject: [PATCH] [Medium] rubygem-rexml: Patch CVE-2024-39908
1+ From 76163d54d61fc9571ad4ce4312eac75baa41680b Mon Sep 17 00:00:00 2001
2+ From: akhila-guruju <v-guakhila@microsoft.com>
3+ Date: Mon, 9 Jun 2025 13:26:17 +0000
4+ Subject: [PATCH] Address CVE-2024-39908
5+
6+ Patch Reference: https://raw.githubusercontent.com/ruby/rexml/refs/tags/v3.3.2/lib/rexml/parsers/baseparser.rb
57
6- Link: https://github.com/ruby/rexml/raw/refs/tags/v3.3.2/lib/rexml/parsers/baseparser.rb
78---
8- lib/rexml/parsers/baseparser.rb | 126 ++++++++++++++++++++++++--------
9- 1 file changed, 97 insertions(+), 29 deletions(-)
9+ lib/rexml/parsers/baseparser.rb | 101 ++++++++++++++++++++++++--------
10+ 1 file changed, 75 insertions(+), 26 deletions(-)
1011
1112diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
12- index 25bc371..a2818ae 100644
13+ index 7739f9d..71f609c 100644
1314--- a/lib/rexml/parsers/baseparser.rb
1415+++ b/lib/rexml/parsers/baseparser.rb
15- @@ -7,6 +7,17 @@ require "strscan"
16-
17- module REXML
18- module Parsers
19- + if StringScanner::Version < "3.0.8"
20- + module StringScannerCaptures
21- + refine StringScanner do
22- + def captures
23- + values_at(*(1...size))
24- + end
25- + end
26- + end
27- + using StringScannerCaptures
28- + end
29- +
30- # = Using the Pull Parser
31- # <em>This API is experimental, and subject to change.</em>
32- # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
33- @@ -113,6 +124,14 @@ module REXML
16+ @@ -124,6 +124,14 @@ module REXML
3417 }
3518
3619 module Private
@@ -45,7 +28,7 @@ index 25bc371..a2818ae 100644
4528 INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
4629 TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
4730 CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
48- @@ -121 ,14 +140,21 @@ module REXML
31+ @@ -132 ,14 +140,21 @@ module REXML
4932 GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
5033 PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
5134 ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
@@ -68,15 +51,15 @@ index 25bc371..a2818ae 100644
6851 end
6952
7053 def add_listener( listener )
71- @@ -141 ,6 +167,7 @@ module REXML
54+ @@ -152 ,6 +167,7 @@ module REXML
7255 def stream=( source )
7356 @source = SourceFactory.create_from( source )
7457 @closed = nil
7558+ @have_root = false
7659 @document_status = nil
7760 @tags = []
7861 @stack = []
79- @@ -195 ,6 +222,8 @@ module REXML
62+ @@ -206 ,6 +222,8 @@ module REXML
8063
8164 # Returns the next event. This is a +PullEvent+ object.
8265 def pull
@@ -85,7 +68,7 @@ index 25bc371..a2818ae 100644
8568 pull_event.tap do |event|
8669 @listeners.each do |listener|
8770 listener.receive event
88- @@ -207 ,7 +236,12 @@ module REXML
71+ @@ -218 ,7 +236,12 @@ module REXML
8972 x, @closed = @closed, nil
9073 return [ :end_element, x ]
9174 end
@@ -99,7 +82,7 @@ index 25bc371..a2818ae 100644
9982 return @stack.shift if @stack.size > 0
10083 #STDERR.puts @source.encoding
10184 #STDERR.puts "BUFFER = #{@source.buffer.inspect}"
102- @@ -219 ,7 +253,14 @@ module REXML
85+ @@ -230 ,7 +253,14 @@ module REXML
10386 return process_instruction(start_position)
10487 elsif @source.match("<!", true)
10588 if @source.match("--", true)
@@ -115,7 +98,7 @@ index 25bc371..a2818ae 100644
11598 elsif @source.match("DOCTYPE", true)
11699 base_error_message = "Malformed DOCTYPE"
117100 unless @source.match(/\s+/um, true)
118- @@ -231 ,7 +272,7 @@ module REXML
101+ @@ -242 ,7 +272,7 @@ module REXML
119102 @source.position = start_position
120103 raise REXML::ParseException.new(message, @source)
121104 end
@@ -124,7 +107,7 @@ index 25bc371..a2818ae 100644
124107 name = parse_name(base_error_message)
125108 if @source.match(/\s*\[/um, true)
126109 id = [nil, nil, nil]
127- @@ -279 ,7 +320,7 @@ module REXML
110+ @@ -290 ,7 +320,7 @@ module REXML
128111 raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
129112 return [ :elementdecl, "<!ELEMENT" + md[1] ]
130113 elsif @source.match("ENTITY", true)
@@ -133,7 +116,7 @@ index 25bc371..a2818ae 100644
133116 ref = false
134117 if match[1] == '%'
135118 ref = true
136- @@ -305 ,13 +346,13 @@ module REXML
119+ @@ -316 ,13 +346,13 @@ module REXML
137120 match << '%' if ref
138121 return match
139122 elsif @source.match("ATTLIST", true)
@@ -149,7 +132,7 @@ index 25bc371..a2818ae 100644
149132 values.each do |attdef|
150133 unless attdef[3] == "#IMPLIED"
151134 attdef.compact!
152- @@ -344 ,19 +385,22 @@ module REXML
135+ @@ -355 ,19 +385,22 @@ module REXML
153136 raise REXML::ParseException.new(message, @source)
154137 end
155138 return [:notationdecl, name, *id]
@@ -174,23 +157,7 @@ index 25bc371..a2818ae 100644
174157 end
175158 if @document_status == :after_doctype
176159 @source.match(/\s*/um, true)
177- @@ -364,10 +408,14 @@ module REXML
178- begin
179- start_position = @source.position
180- if @source.match("<", true)
181- + # :text's read_until may remain only "<" in buffer. In the
182- + # case, buffer is empty here. So we need to fill buffer
183- + # here explicitly.
184- + @source.ensure_buffer
185- if @source.match("/", true)
186- @nsstack.shift
187- last_tag = @tags.pop
188- - md = @source.match(CLOSE_PATTERN, true)
189- + md = @source.match(Private::CLOSE_PATTERN, true)
190- if md and !last_tag
191- message = "Unexpected top-level end tag (got '#{md[1]}')"
192- raise REXML::ParseException.new(message, @source)
193- @@ -384,16 +432,15 @@ module REXML
160+ @@ -399,16 +432,15 @@ module REXML
194161 #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
195162 raise REXML::ParseException.new("Malformed node", @source) unless md
196163 if md[0][0] == ?-
@@ -211,7 +178,7 @@ index 25bc371..a2818ae 100644
211178 return [ :cdata, md[1] ] if md
212179 end
213180 raise REXML::ParseException.new( "Declarations can only occur "+
214- @@ -402 ,19 +449,19 @@ module REXML
181+ @@ -417 ,19 +449,19 @@ module REXML
215182 return process_instruction(start_position)
216183 else
217184 # Get the next tag
@@ -236,7 +203,7 @@ index 25bc371..a2818ae 100644
236203 unless @nsstack.find{|k| k.member?(prefix)}
237204 raise UndefinedNamespaceException.new(prefix,@source,self)
238205 end
239- @@ -424,13 +471,25 @@ module REXML
206+ @@ -439,8 +471,12 @@ module REXML
240207 @closed = tag
241208 @nsstack.shift
242209 else
@@ -249,12 +216,10 @@ index 25bc371..a2818ae 100644
249216 return [ :start_element, tag, attributes ]
250217 end
251218 else
252- - md = @source.match(/([^<]*)/um, true)
253- - text = md[1]
254- + text = @source.read_until("<")
255- + if text.chomp!("<")
256- + @source.position -= "<".bytesize
257- + end
219+ @@ -448,6 +484,12 @@ module REXML
220+ if text.chomp!("<")
221+ @source.position -= "<".bytesize
222+ end
258223+ if @tags.empty? and @have_root
259224+ unless /\A\s*\z/.match?(text)
260225+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
@@ -264,7 +229,7 @@ index 25bc371..a2818ae 100644
264229 return [ :text, text ]
265230 end
266231 rescue REXML::UndefinedNamespaceException
267- @@ -475 ,10 +534,14 @@ module REXML
232+ @@ -492 ,10 +534,14 @@ module REXML
268233
269234 # Unescapes all possible entities
270235 def unnormalize( string, entities=nil, filter=nil )
@@ -281,7 +246,7 @@ index 25bc371..a2818ae 100644
281246 m=$1
282247 if m.start_with?("x")
283248 code_point = Integer(m[1..-1], 16)
284- @@ -494 ,7 +557,7 @@ module REXML
249+ @@ -511 ,7 +557,7 @@ module REXML
285250 unless filter and filter.include?(entity_reference)
286251 entity_value = entity( entity_reference, entities )
287252 if entity_value
@@ -290,7 +255,7 @@ index 25bc371..a2818ae 100644
290255 rv.gsub!( re, entity_value )
291256 sum += rv.bytesize
292257 if sum > Security.entity_expansion_text_limit
293- @@ -506 ,7 +569,7 @@ module REXML
258+ @@ -523 ,7 +569,7 @@ module REXML
294259 end
295260 end
296261 end
@@ -299,7 +264,7 @@ index 25bc371..a2818ae 100644
299264 end
300265 rv
301266 end
302- @@ -527 ,7 +590,7 @@ module REXML
267+ @@ -544 ,7 +590,7 @@ module REXML
303268 end
304269
305270 def parse_name(base_error_message)
@@ -308,7 +273,7 @@ index 25bc371..a2818ae 100644
308273 unless md
309274 if @source.match(/\s*\S/um)
310275 message = "#{base_error_message}: invalid name"
311- @@ -606 ,13 +669,16 @@ module REXML
276+ @@ -623 ,13 +669,16 @@ module REXML
312277 end
313278
314279 def process_instruction(start_position)
@@ -327,17 +292,6 @@ index 25bc371..a2818ae 100644
327292 content = match_data[2]
328293 version = VERSION.match(content)
329294 version = version[1] unless version.nil?
330- @@ -654,8 +720,10 @@ module REXML
331- raise REXML::ParseException.new(message, @source)
332- end
333- quote = match[1]
334- + start_position = @source.position
335- value = @source.read_until(quote)
336- unless value.chomp!(quote)
337- + @source.position = start_position
338- message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
339- raise REXML::ParseException.new(message, @source)
340- end
341295- -
342- 2.34.1
296+ 2.45.2
343297
0 commit comments