Skip to content

Commit 313946d

Browse files
[AUTO-CHERRYPICK] [Medium] rubygem-rexml: Patch CVE-2024-39908 - branch main (#12667)
Co-authored-by: Kevin Lockwood <57274670+kevin-b-lockwood@users.noreply.github.com>
1 parent 5177e01 commit 313946d

2 files changed

Lines changed: 348 additions & 1 deletion

File tree

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
From 66d3d405337c1dea5b4522bf87e06a8cfe815298 Mon Sep 17 00:00:00 2001
2+
From: Kevin Lockwood <v-klockwood@microsoft.com>
3+
Date: Tue, 18 Feb 2025 12:13:44 -0800
4+
Subject: [PATCH] [Medium] rubygem-rexml: Patch CVE-2024-39908
5+
6+
Link: https://github.com/ruby/rexml/raw/refs/tags/v3.3.2/lib/rexml/parsers/baseparser.rb
7+
---
8+
lib/rexml/parsers/baseparser.rb | 126 ++++++++++++++++++++++++--------
9+
1 file changed, 97 insertions(+), 29 deletions(-)
10+
11+
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
12+
index 25bc371..a2818ae 100644
13+
--- a/lib/rexml/parsers/baseparser.rb
14+
+++ b/lib/rexml/parsers/baseparser.rb
15+
@@ -7,6 +7,17 @@ require "strscan"
16+
17+
module REXML
18+
module Parsers
19+
+ if StringScanner::Version < "3.0.8"
20+
+ module StringScannerCaptures
21+
+ refine StringScanner do
22+
+ def captures
23+
+ values_at(*(1...size))
24+
+ end
25+
+ end
26+
+ end
27+
+ using StringScannerCaptures
28+
+ end
29+
+
30+
# = Using the Pull Parser
31+
# <em>This API is experimental, and subject to change.</em>
32+
# parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
33+
@@ -113,6 +124,14 @@ module REXML
34+
}
35+
36+
module Private
37+
+ # Terminal requires two or more letters.
38+
+ INSTRUCTION_TERM = "?>"
39+
+ COMMENT_TERM = "-->"
40+
+ CDATA_TERM = "]]>"
41+
+ DOCTYPE_TERM = "]>"
42+
+ # Read to the end of DOCTYPE because there is no proper ENTITY termination
43+
+ ENTITY_TERM = DOCTYPE_TERM
44+
+
45+
INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
46+
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
47+
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
48+
@@ -121,14 +140,21 @@ module REXML
49+
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
50+
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
51+
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
52+
+ CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
53+
+ CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
54+
+ DEFAULT_ENTITIES_PATTERNS = {}
55+
+ default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
56+
+ default_entities.each do |term|
57+
+ DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
58+
+ end
59+
end
60+
private_constant :Private
61+
- include Private
62+
63+
def initialize( source )
64+
self.stream = source
65+
@listeners = []
66+
@entity_expansion_count = 0
67+
+ @prefixes = Set.new
68+
end
69+
70+
def add_listener( listener )
71+
@@ -141,6 +167,7 @@ module REXML
72+
def stream=( source )
73+
@source = SourceFactory.create_from( source )
74+
@closed = nil
75+
+ @have_root = false
76+
@document_status = nil
77+
@tags = []
78+
@stack = []
79+
@@ -195,6 +222,8 @@ module REXML
80+
81+
# Returns the next event. This is a +PullEvent+ object.
82+
def pull
83+
+ @source.drop_parsed_content
84+
+
85+
pull_event.tap do |event|
86+
@listeners.each do |listener|
87+
listener.receive event
88+
@@ -207,7 +236,12 @@ module REXML
89+
x, @closed = @closed, nil
90+
return [ :end_element, x ]
91+
end
92+
- return [ :end_document ] if empty?
93+
+ if empty?
94+
+ if @document_status == :in_doctype
95+
+ raise ParseException.new("Malformed DOCTYPE: unclosed", @source)
96+
+ end
97+
+ return [ :end_document ]
98+
+ end
99+
return @stack.shift if @stack.size > 0
100+
#STDERR.puts @source.encoding
101+
#STDERR.puts "BUFFER = #{@source.buffer.inspect}"
102+
@@ -219,7 +253,14 @@ module REXML
103+
return process_instruction(start_position)
104+
elsif @source.match("<!", true)
105+
if @source.match("--", true)
106+
- return [ :comment, @source.match(/(.*?)-->/um, true)[1] ]
107+
+ md = @source.match(/(.*?)-->/um, true, term: Private::COMMENT_TERM)
108+
+ if md.nil?
109+
+ raise REXML::ParseException.new("Unclosed comment", @source)
110+
+ end
111+
+ if /--|-\z/.match?(md[1])
112+
+ raise REXML::ParseException.new("Malformed comment", @source)
113+
+ end
114+
+ return [ :comment, md[1] ]
115+
elsif @source.match("DOCTYPE", true)
116+
base_error_message = "Malformed DOCTYPE"
117+
unless @source.match(/\s+/um, true)
118+
@@ -231,7 +272,7 @@ module REXML
119+
@source.position = start_position
120+
raise REXML::ParseException.new(message, @source)
121+
end
122+
- @nsstack.unshift(curr_ns=Set.new)
123+
+ @nsstack.unshift(Set.new)
124+
name = parse_name(base_error_message)
125+
if @source.match(/\s*\[/um, true)
126+
id = [nil, nil, nil]
127+
@@ -279,7 +320,7 @@ module REXML
128+
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
129+
return [ :elementdecl, "<!ELEMENT" + md[1] ]
130+
elsif @source.match("ENTITY", true)
131+
- match = [:entitydecl, *@source.match(ENTITYDECL_PATTERN, true).captures.compact]
132+
+ match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true, term: Private::ENTITY_TERM).captures.compact]
133+
ref = false
134+
if match[1] == '%'
135+
ref = true
136+
@@ -305,13 +346,13 @@ module REXML
137+
match << '%' if ref
138+
return match
139+
elsif @source.match("ATTLIST", true)
140+
- md = @source.match(ATTLISTDECL_END, true)
141+
+ md = @source.match(Private::ATTLISTDECL_END, true)
142+
raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
143+
element = md[1]
144+
contents = md[0]
145+
146+
pairs = {}
147+
- values = md[0].scan( ATTDEF_RE )
148+
+ values = md[0].strip.scan( ATTDEF_RE )
149+
values.each do |attdef|
150+
unless attdef[3] == "#IMPLIED"
151+
attdef.compact!
152+
@@ -344,19 +385,22 @@ module REXML
153+
raise REXML::ParseException.new(message, @source)
154+
end
155+
return [:notationdecl, name, *id]
156+
- elsif md = @source.match(/--(.*?)-->/um, true)
157+
+ elsif md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
158+
case md[1]
159+
when /--/, /-\z/
160+
raise REXML::ParseException.new("Malformed comment", @source)
161+
end
162+
return [ :comment, md[1] ] if md
163+
end
164+
- elsif match = @source.match(/(%.*?;)\s*/um, true)
165+
+ elsif match = @source.match(/(%.*?;)\s*/um, true, term: Private::DOCTYPE_TERM)
166+
return [ :externalentity, match[1] ]
167+
elsif @source.match(/\]\s*>/um, true)
168+
@document_status = :after_doctype
169+
return [ :end_doctype ]
170+
end
171+
+ if @document_status == :in_doctype
172+
+ raise ParseException.new("Malformed DOCTYPE: invalid declaration", @source)
173+
+ end
174+
end
175+
if @document_status == :after_doctype
176+
@source.match(/\s*/um, true)
177+
@@ -364,10 +408,14 @@ module REXML
178+
begin
179+
start_position = @source.position
180+
if @source.match("<", true)
181+
+ # :text's read_until may remain only "<" in buffer. In the
182+
+ # case, buffer is empty here. So we need to fill buffer
183+
+ # here explicitly.
184+
+ @source.ensure_buffer
185+
if @source.match("/", true)
186+
@nsstack.shift
187+
last_tag = @tags.pop
188+
- md = @source.match(CLOSE_PATTERN, true)
189+
+ md = @source.match(Private::CLOSE_PATTERN, true)
190+
if md and !last_tag
191+
message = "Unexpected top-level end tag (got '#{md[1]}')"
192+
raise REXML::ParseException.new(message, @source)
193+
@@ -384,16 +432,15 @@ module REXML
194+
#STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
195+
raise REXML::ParseException.new("Malformed node", @source) unless md
196+
if md[0][0] == ?-
197+
- md = @source.match(/--(.*?)-->/um, true)
198+
+ md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM)
199+
200+
- case md[1]
201+
- when /--/, /-\z/
202+
+ if md.nil? || /--|-\z/.match?(md[1])
203+
raise REXML::ParseException.new("Malformed comment", @source)
204+
end
205+
206+
- return [ :comment, md[1] ] if md
207+
+ return [ :comment, md[1] ]
208+
else
209+
- md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true)
210+
+ md = @source.match(/\[CDATA\[(.*?)\]\]>/um, true, term: Private::CDATA_TERM)
211+
return [ :cdata, md[1] ] if md
212+
end
213+
raise REXML::ParseException.new( "Declarations can only occur "+
214+
@@ -402,19 +449,19 @@ module REXML
215+
return process_instruction(start_position)
216+
else
217+
# Get the next tag
218+
- md = @source.match(TAG_PATTERN, true)
219+
+ md = @source.match(Private::TAG_PATTERN, true)
220+
unless md
221+
@source.position = start_position
222+
raise REXML::ParseException.new("malformed XML: missing tag start", @source)
223+
end
224+
tag = md[1]
225+
@document_status = :in_element
226+
- prefixes = Set.new
227+
- prefixes << md[2] if md[2]
228+
+ @prefixes.clear
229+
+ @prefixes << md[2] if md[2]
230+
@nsstack.unshift(curr_ns=Set.new)
231+
- attributes, closed = parse_attributes(prefixes, curr_ns)
232+
+ attributes, closed = parse_attributes(@prefixes, curr_ns)
233+
# Verify that all of the prefixes have been defined
234+
- for prefix in prefixes
235+
+ for prefix in @prefixes
236+
unless @nsstack.find{|k| k.member?(prefix)}
237+
raise UndefinedNamespaceException.new(prefix,@source,self)
238+
end
239+
@@ -424,13 +471,25 @@ module REXML
240+
@closed = tag
241+
@nsstack.shift
242+
else
243+
+ if @tags.empty? and @have_root
244+
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
245+
+ end
246+
@tags.push( tag )
247+
end
248+
+ @have_root = true
249+
return [ :start_element, tag, attributes ]
250+
end
251+
else
252+
- md = @source.match(/([^<]*)/um, true)
253+
- text = md[1]
254+
+ text = @source.read_until("<")
255+
+ if text.chomp!("<")
256+
+ @source.position -= "<".bytesize
257+
+ end
258+
+ if @tags.empty? and @have_root
259+
+ unless /\A\s*\z/.match?(text)
260+
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
261+
+ end
262+
+ return pull_event
263+
+ end
264+
return [ :text, text ]
265+
end
266+
rescue REXML::UndefinedNamespaceException
267+
@@ -475,10 +534,14 @@ module REXML
268+
269+
# Unescapes all possible entities
270+
def unnormalize( string, entities=nil, filter=nil )
271+
- rv = string.gsub( /\r\n?/, "\n" )
272+
+ if string.include?("\r")
273+
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
274+
+ else
275+
+ rv = string.dup
276+
+ end
277+
matches = rv.scan( REFERENCE_RE )
278+
return rv if matches.size == 0
279+
- rv.gsub!( /&#((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
280+
+ rv.gsub!( Private::CHARACTER_REFERENCES ) {
281+
m=$1
282+
if m.start_with?("x")
283+
code_point = Integer(m[1..-1], 16)
284+
@@ -494,7 +557,7 @@ module REXML
285+
unless filter and filter.include?(entity_reference)
286+
entity_value = entity( entity_reference, entities )
287+
if entity_value
288+
- re = /&#{entity_reference};/
289+
+ re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
290+
rv.gsub!( re, entity_value )
291+
sum += rv.bytesize
292+
if sum > Security.entity_expansion_text_limit
293+
@@ -506,7 +569,7 @@ module REXML
294+
end
295+
end
296+
end
297+
- rv.gsub!( /&amp;/, '&' )
298+
+ rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
299+
end
300+
rv
301+
end
302+
@@ -527,7 +590,7 @@ module REXML
303+
end
304+
305+
def parse_name(base_error_message)
306+
- md = @source.match(NAME_PATTERN, true)
307+
+ md = @source.match(Private::NAME_PATTERN, true)
308+
unless md
309+
if @source.match(/\s*\S/um)
310+
message = "#{base_error_message}: invalid name"
311+
@@ -606,13 +669,16 @@ module REXML
312+
end
313+
314+
def process_instruction(start_position)
315+
- match_data = @source.match(INSTRUCTION_END, true)
316+
+ match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM)
317+
unless match_data
318+
message = "Invalid processing instruction node"
319+
@source.position = start_position
320+
raise REXML::ParseException.new(message, @source)
321+
end
322+
- if @document_status.nil? and match_data[1] == "xml"
323+
+ if match_data[1] == "xml"
324+
+ if @document_status
325+
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
326+
+ end
327+
content = match_data[2]
328+
version = VERSION.match(content)
329+
version = version[1] unless version.nil?
330+
@@ -654,8 +720,10 @@ module REXML
331+
raise REXML::ParseException.new(message, @source)
332+
end
333+
quote = match[1]
334+
+ start_position = @source.position
335+
value = @source.read_until(quote)
336+
unless value.chomp!(quote)
337+
+ @source.position = start_position
338+
message = "Missing attribute value end quote: <#{name}>: <#{quote}>"
339+
raise REXML::ParseException.new(message, @source)
340+
end
341+
--
342+
2.34.1
343+

SPECS/rubygem-rexml/rubygem-rexml.spec

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Summary: REXML is an XML toolkit for Ruby
44
Name: rubygem-%{gem_name}
55
Version: 3.2.7
6-
Release: 3%{?dist}
6+
Release: 4%{?dist}
77
License: BSD
88
Vendor: Microsoft Corporation
99
Distribution: Mariner
@@ -12,6 +12,7 @@ URL: https://github.com/ruby/rexml
1212
Source0: https://github.com/ruby/rexml/archive/refs/tags/v%{version}.tar.gz#/%{gem_name}-%{version}.tar.gz
1313
Patch0: CVE-2024-41946.patch
1414
Patch1: CVE-2024-49761.patch
15+
Patch2: CVE-2024-39908.patch
1516
BuildRequires: git
1617
BuildRequires: ruby
1718
Requires: ruby(release)
@@ -36,6 +37,9 @@ gem install -V --local --force --install-dir %{buildroot}/%{gemdir} %{gem_name}-
3637
%{gemdir}
3738

3839
%changelog
40+
* Tue Feb 18 2025 Kevin Lockwood <v-klockwood@microsoft.com> - 3.2.7-4
41+
- Add patch for CVE-2024-39908
42+
3943
* Mon Nov 04 2024 Saul Paredes <saulparedes@microsoft.com> - 3.2.7-3
4044
- Add patch for CVE-2024-49761
4145

0 commit comments

Comments
 (0)