Skip to content

Commit a67cb06

Browse files
[AUTO-CHERRYPICK] apply patch to fix CVE-2024-6232 and CVE-2024-8088 for python3 2.0 - branch main (#10553)
Co-authored-by: himaja-kesari <123194058+himaja-kesari@users.noreply.github.com>
1 parent 271b7dc commit a67cb06

7 files changed

Lines changed: 381 additions & 27 deletions

File tree

SPECS/python3/CVE-2024-6232.patch

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
2+
index 7a6158c2eb9..bc5ec8bd582 100755
3+
--- a/Lib/tarfile.py
4+
+++ b/Lib/tarfile.py
5+
@@ -840,6 +840,9 @@ _NAMED_FILTERS = {
6+
# Sentinel for replace() defaults, meaning "don't change the attribute"
7+
_KEEP = object()
8+
9+
+# Header length is digits followed by a space.
10+
+_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ")
11+
+
12+
class TarInfo(object):
13+
"""Informational class which holds the details about an
14+
archive member given by a tar header block.
15+
@@ -1399,41 +1402,59 @@ class TarInfo(object):
16+
else:
17+
pax_headers = tarfile.pax_headers.copy()
18+
19+
- # Check if the pax header contains a hdrcharset field. This tells us
20+
- # the encoding of the path, linkpath, uname and gname fields. Normally,
21+
- # these fields are UTF-8 encoded but since POSIX.1-2008 tar
22+
- # implementations are allowed to store them as raw binary strings if
23+
- # the translation to UTF-8 fails.
24+
- match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
25+
- if match is not None:
26+
- pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
27+
-
28+
- # For the time being, we don't care about anything other than "BINARY".
29+
- # The only other value that is currently allowed by the standard is
30+
- # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
31+
- hdrcharset = pax_headers.get("hdrcharset")
32+
- if hdrcharset == "BINARY":
33+
- encoding = tarfile.encoding
34+
- else:
35+
- encoding = "utf-8"
36+
-
37+
# Parse pax header information. A record looks like that:
38+
# "%d %s=%s\n" % (length, keyword, value). length is the size
39+
# of the complete record including the length field itself and
40+
- # the newline. keyword and value are both UTF-8 encoded strings.
41+
- regex = re.compile(br"(\d+) ([^=]+)=")
42+
+ # the newline.
43+
pos = 0
44+
- while True:
45+
- match = regex.match(buf, pos)
46+
- if not match:
47+
- break
48+
+ encoding = None
49+
+ raw_headers = []
50+
+ while len(buf) > pos and buf[pos] != 0x00:
51+
+ if not (match := _header_length_prefix_re.match(buf, pos)):
52+
+ raise InvalidHeaderError("invalid header")
53+
+ try:
54+
+ length = int(match.group(1))
55+
+ except ValueError:
56+
+ raise InvalidHeaderError("invalid header")
57+
+ # Headers must be at least 5 bytes, shortest being '5 x=\n'.
58+
+ # Value is allowed to be empty.
59+
+ if length < 5:
60+
+ raise InvalidHeaderError("invalid header")
61+
+ if pos + length > len(buf):
62+
+ raise InvalidHeaderError("invalid header")
63+
64+
- length, keyword = match.groups()
65+
- length = int(length)
66+
- if length == 0:
67+
+ header_value_end_offset = match.start(1) + length - 1 # Last byte of the header
68+
+ keyword_and_value = buf[match.end(1) + 1:header_value_end_offset]
69+
+ raw_keyword, equals, raw_value = keyword_and_value.partition(b"=")
70+
+
71+
+ # Check the framing of the header. The last character must be '\n' (0x0A)
72+
+ if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A:
73+
raise InvalidHeaderError("invalid header")
74+
- value = buf[match.end(2) + 1:match.start(1) + length - 1]
75+
+ raw_headers.append((length, raw_keyword, raw_value))
76+
+
77+
+ # Check if the pax header contains a hdrcharset field. This tells us
78+
+ # the encoding of the path, linkpath, uname and gname fields. Normally,
79+
+ # these fields are UTF-8 encoded but since POSIX.1-2008 tar
80+
+ # implementations are allowed to store them as raw binary strings if
81+
+ # the translation to UTF-8 fails. For the time being, we don't care about
82+
+ # anything other than "BINARY". The only other value that is currently
83+
+ # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
84+
+ # Note that we only follow the initial 'hdrcharset' setting to preserve
85+
+ # the initial behavior of the 'tarfile' module.
86+
+ if raw_keyword == b"hdrcharset" and encoding is None:
87+
+ if raw_value == b"BINARY":
88+
+ encoding = tarfile.encoding
89+
+ else: # This branch ensures only the first 'hdrcharset' header is used.
90+
+ encoding = "utf-8"
91+
+
92+
+ pos += length
93+
94+
+ # If no explicit hdrcharset is set, we use UTF-8 as a default.
95+
+ if encoding is None:
96+
+ encoding = "utf-8"
97+
+
98+
+ # After parsing the raw headers we can decode them to text.
99+
+ for length, raw_keyword, raw_value in raw_headers:
100+
# Normally, we could just use "utf-8" as the encoding and "strict"
101+
# as the error handler, but we better not take the risk. For
102+
# example, GNU tar <= 1.23 is known to store filenames it cannot
103+
@@ -1441,17 +1462,16 @@ class TarInfo(object):
104+
# hdrcharset=BINARY header).
105+
# We first try the strict standard encoding, and if that fails we
106+
# fall back on the user's encoding and error handler.
107+
- keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
108+
+ keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8",
109+
tarfile.errors)
110+
if keyword in PAX_NAME_FIELDS:
111+
- value = self._decode_pax_field(value, encoding, tarfile.encoding,
112+
+ value = self._decode_pax_field(raw_value, encoding, tarfile.encoding,
113+
tarfile.errors)
114+
else:
115+
- value = self._decode_pax_field(value, "utf-8", "utf-8",
116+
+ value = self._decode_pax_field(raw_value, "utf-8", "utf-8",
117+
tarfile.errors)
118+
119+
pax_headers[keyword] = value
120+
- pos += length
121+
122+
# Fetch the next header.
123+
try:
124+
@@ -1466,7 +1486,7 @@ class TarInfo(object):
125+
126+
elif "GNU.sparse.size" in pax_headers:
127+
# GNU extended sparse format version 0.0.
128+
- self._proc_gnusparse_00(next, pax_headers, buf)
129+
+ self._proc_gnusparse_00(next, raw_headers)
130+
131+
elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
132+
# GNU extended sparse format version 1.0.
133+
@@ -1488,15 +1508,24 @@ class TarInfo(object):
134+
135+
return next
136+
137+
- def _proc_gnusparse_00(self, next, pax_headers, buf):
138+
+ def _proc_gnusparse_00(self, next, raw_headers):
139+
"""Process a GNU tar extended sparse header, version 0.0.
140+
"""
141+
offsets = []
142+
- for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
143+
- offsets.append(int(match.group(1)))
144+
numbytes = []
145+
- for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
146+
- numbytes.append(int(match.group(1)))
147+
+ for _, keyword, value in raw_headers:
148+
+ if keyword == b"GNU.sparse.offset":
149+
+ try:
150+
+ offsets.append(int(value.decode()))
151+
+ except ValueError:
152+
+ raise InvalidHeaderError("invalid header")
153+
+
154+
+ elif keyword == b"GNU.sparse.numbytes":
155+
+ try:
156+
+ numbytes.append(int(value.decode()))
157+
+ except ValueError:
158+
+ raise InvalidHeaderError("invalid header")
159+
+
160+
next.sparse = list(zip(offsets, numbytes))
161+
162+
def _proc_gnusparse_01(self, next, pax_headers):
163+
@@ -2875,4 +2904,4 @@ def main():
164+
print('{!r} file created.'.format(tar_name))
165+
166+
if __name__ == '__main__':
167+
- main()
168+
+ main()
169+
\ No newline at end of file
170+
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
171+
index 3df64c78032..cadd3f35808 100644
172+
--- a/Lib/test/test_tarfile.py
173+
+++ b/Lib/test/test_tarfile.py
174+
@@ -1113,6 +1113,47 @@ class PaxReadTest(LongnameTest, ReadTest, unittest.TestCase):
175+
finally:
176+
tar.close()
177+
178+
+ def test_pax_header_bad_formats(self):
179+
+ # The fields from the pax header have priority over the
180+
+ # TarInfo.
181+
+ pax_header_replacements = (
182+
+ b" foo=bar\n",
183+
+ b"0 \n",
184+
+ b"1 \n",
185+
+ b"2 \n",
186+
+ b"3 =\n",
187+
+ b"4 =a\n",
188+
+ b"1000000 foo=bar\n",
189+
+ b"0 foo=bar\n",
190+
+ b"-12 foo=bar\n",
191+
+ b"000000000000000000000000036 foo=bar\n",
192+
+ )
193+
+ pax_headers = {"foo": "bar"}
194+
+
195+
+ for replacement in pax_header_replacements:
196+
+ with self.subTest(header=replacement):
197+
+ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
198+
+ encoding="iso8859-1")
199+
+ try:
200+
+ t = tarfile.TarInfo()
201+
+ t.name = "pax" # non-ASCII
202+
+ t.uid = 1
203+
+ t.pax_headers = pax_headers
204+
+ tar.addfile(t)
205+
+ finally:
206+
+ tar.close()
207+
+
208+
+ with open(tmpname, "rb") as f:
209+
+ data = f.read()
210+
+ self.assertIn(b"11 foo=bar\n", data)
211+
+ data = data.replace(b"11 foo=bar\n", replacement)
212+
+
213+
+ with open(tmpname, "wb") as f:
214+
+ f.truncate()
215+
+ f.write(data)
216+
+
217+
+ with self.assertRaisesRegex(tarfile.ReadError, r"file could not be opened successfully"):
218+
+ tarfile.open(tmpname, encoding="iso8859-1")
219+
220+
class WriteTestBase(TarTest):
221+
# Put all write tests in here that are supposed to be tested

SPECS/python3/CVE-2024-8088.patch

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
2+
index 17e95eb8623..31e9fef4355 100644
3+
--- a/Lib/test/test_zipfile.py
4+
+++ b/Lib/test/test_zipfile.py
5+
@@ -3054,6 +3054,83 @@ class TestPath(unittest.TestCase):
6+
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
7+
zipfile.CompleteDirs._implied_dirs(data)
8+
9+
+ def test_malformed_paths(self):
10+
+ """
11+
+ Path should handle malformed paths gracefully.
12+
+
13+
+ Paths with leading slashes are not visible.
14+
+
15+
+ Paths with dots are treated like regular files.
16+
+ """
17+
+ data = io.BytesIO()
18+
+ zf = zipfile.ZipFile(data, "w")
19+
+ zf.writestr("/one-slash.txt", b"content")
20+
+ zf.writestr("//two-slash.txt", b"content")
21+
+ zf.writestr("../parent.txt", b"content")
22+
+ zf.filename = ''
23+
+ root = zipfile.Path(zf)
24+
+ assert list(map(str, root.iterdir())) == ['../']
25+
+ assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
26+
+
27+
+ def test_unsupported_names(self):
28+
+ """
29+
+ Path segments with special characters are readable.
30+
+
31+
+ On some platforms or file systems, characters like
32+
+ ``:`` and ``?`` are not allowed, but they are valid
33+
+ in the zip file.
34+
+ """
35+
+ data = io.BytesIO()
36+
+ zf = zipfile.ZipFile(data, "w")
37+
+ zf.writestr("path?", b"content")
38+
+ zf.writestr("V: NMS.flac", b"fLaC...")
39+
+ zf.filename = ''
40+
+ root = zipfile.Path(zf)
41+
+ contents = root.iterdir()
42+
+ assert next(contents).name == 'path?'
43+
+ assert next(contents).name == 'V: NMS.flac'
44+
+ assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
45+
+
46+
+ def test_backslash_not_separator(self):
47+
+ """
48+
+ In a zip file, backslashes are not separators.
49+
+ """
50+
+ data = io.BytesIO()
51+
+ zf = zipfile.ZipFile(data, "w")
52+
+ zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
53+
+ zf.filename = ''
54+
+ root = zipfile.Path(zf)
55+
+ (first,) = root.iterdir()
56+
+ assert not first.is_dir()
57+
+ assert first.name == 'foo\\bar'
58+
+
59+
+
60+
+class DirtyZipInfo(zipfile.ZipInfo):
61+
+ """
62+
+ Bypass name sanitization.
63+
+ """
64+
+
65+
+ def __init__(self, filename, *args, **kwargs):
66+
+ super().__init__(filename, *args, **kwargs)
67+
+ self.filename = filename
68+
+
69+
+ @classmethod
70+
+ def for_name(cls, name, archive):
71+
+ """
72+
+ Construct the same way that ZipFile.writestr does.
73+
+
74+
+ TODO: extract this functionality and re-use
75+
+ """
76+
+ self = cls(filename=name, date_time=time.localtime(time.time())[:6])
77+
+ self.compress_type = archive.compression
78+
+ self.compress_level = archive.compresslevel
79+
+ if self.filename.endswith('/'): # pragma: no cover
80+
+ self.external_attr = 0o40775 << 16 # drwxrwxr-x
81+
+ self.external_attr |= 0x10 # MS-DOS directory flag
82+
+ else:
83+
+ self.external_attr = 0o600 << 16 # ?rw-------
84+
+ return self
85+
+
86+
87+
if __name__ == "__main__":
88+
- unittest.main()
89+
+ unittest.main()
90+
\ No newline at end of file
91+
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
92+
index 95f95ee1126..dd48a6a87ba 100644
93+
--- a/Lib/zipfile.py
94+
+++ b/Lib/zipfile.py
95+
@@ -2146,7 +2146,7 @@ def _parents(path):
96+
def _ancestry(path):
97+
"""
98+
Given a path with elements separated by
99+
- posixpath.sep, generate all elements of that path
100+
+ posixpath.sep, generate all elements of that path.
101+
102+
>>> list(_ancestry('b/d'))
103+
['b/d', 'b']
104+
@@ -2158,9 +2158,14 @@ def _ancestry(path):
105+
['b']
106+
>>> list(_ancestry(''))
107+
[]
108+
+
109+
+ Multiple separators are treated like a single.
110+
+
111+
+ >>> list(_ancestry('//b//d///f//'))
112+
+ ['//b//d///f', '//b//d', '//b']
113+
"""
114+
path = path.rstrip(posixpath.sep)
115+
- while path and path != posixpath.sep:
116+
+ while path.rstrip(posixpath.sep):
117+
yield path
118+
path, tail = posixpath.split(path)
119+
120+
@@ -2446,4 +2451,4 @@ def main(args=None):
121+
122+
123+
if __name__ == "__main__":
124+
- main()
125+
+ main()
126+
\ No newline at end of file

SPECS/python3/python3.spec

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
Summary: A high-level scripting language
1313
Name: python3
1414
Version: 3.9.19
15-
Release: 4%{?dist}
15+
Release: 5%{?dist}
1616
License: PSF
1717
Vendor: Microsoft Corporation
1818
Distribution: Mariner
@@ -24,6 +24,8 @@ Patch0: cgi3.patch
2424
Patch1: 0001-gh-95231-Disable-md5-crypt-modules-if-FIPS-is-enable.patch
2525
Patch2: CVE-2024-0397.patch
2626
Patch3: CVE-2024-7592.patch
27+
Patch4: CVE-2024-6232.patch
28+
Patch5: CVE-2024-8088.patch
2729
# Patch for setuptools, resolved in 65.5.1
2830
Patch1000: CVE-2022-40897.patch
2931
Patch1001: CVE-2024-6345.patch
@@ -165,6 +167,8 @@ The test package contains all regression tests for Python as well as the modules
165167
%patch1 -p1
166168
%patch2 -p1
167169
%patch3 -p1
170+
%patch4 -p1
171+
%patch5 -p1
168172

169173
%build
170174
# Remove GCC specs and build environment linker scripts
@@ -320,6 +324,9 @@ rm -rf %{buildroot}%{_bindir}/__pycache__
320324
%{_libdir}/python%{majmin}/test/*
321325

322326
%changelog
327+
* Fri Sep 20 2024 Himaja Kesari <himajakesari@microsoft.com> - 3.9.19-5
328+
- Patch CVE-2024-6232 and CVE-2024-8088
329+
323330
* Wed Aug 21 2024 Brian Fjeldstad <bfjelds@microsoft.com> - 3.9.19-4
324331
- Patch for CVE-2024-7592
325332

0 commit comments

Comments
 (0)