microsoft
diff --git a/‎SPECS/python3/CVE-2024-6232.patch‎
Lines changed: 221 additions & 0 deletions b/‎SPECS/python3/CVE-2024-6232.patch‎
Lines changed: 221 additions & 0 deletions
diff --git a/‎SPECS/python3/CVE-2024-8088.patch‎
Lines changed: 126 additions & 0 deletions b/‎SPECS/python3/CVE-2024-8088.patch‎
Lines changed: 126 additions & 0 deletions
diff --git a/‎SPECS/python3/python3.spec‎
Lines changed: 8 additions & 1 deletion b/‎SPECS/python3/python3.spec‎
Lines changed: 8 additions & 1 deletion
@@ -0,0 +1,221 @@
+diff --git a/Lib/tarfile.py b/Lib/tarfile.py
+index 7a6158c2eb9..bc5ec8bd582 100755
+--- a/Lib/tarfile.py
++++ b/Lib/tarfile.py
+@@ -840,6 +840,9 @@ _NAMED_FILTERS = {
+ # Sentinel for replace() defaults, meaning "don't change the attribute"
+ _KEEP = object()
+ 
++# Header length is digits followed by a space.
++_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ")
++
+ class TarInfo(object):
+     """Informational class which holds the details about an
+        archive member given by a tar header block.
+@@ -1399,41 +1402,59 @@ class TarInfo(object):
+         else:
+             pax_headers = tarfile.pax_headers.copy()
+ 
+-        # Check if the pax header contains a hdrcharset field. This tells us
+-        # the encoding of the path, linkpath, uname and gname fields. Normally,
+-        # these fields are UTF-8 encoded but since POSIX.1-2008 tar
+-        # implementations are allowed to store them as raw binary strings if
+-        # the translation to UTF-8 fails.
+-        match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
+-        if match is not None:
+-            pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
+-
+-        # For the time being, we don't care about anything other than "BINARY".
+-        # The only other value that is currently allowed by the standard is
+-        # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
+-        hdrcharset = pax_headers.get("hdrcharset")
+-        if hdrcharset == "BINARY":
+-            encoding = tarfile.encoding
+-        else:
+-            encoding = "utf-8"
+-
+         # Parse pax header information. A record looks like that:
+         # "%d %s=%s\n" % (length, keyword, value). length is the size
+         # of the complete record including the length field itself and
+-        # the newline. keyword and value are both UTF-8 encoded strings.
+-        regex = re.compile(br"(\d+) ([^=]+)=")
++        # the newline.
+         pos = 0
+-        while True:
+-            match = regex.match(buf, pos)
+-            if not match:
+-                break
++        encoding = None
++        raw_headers = []
++        while len(buf) > pos and buf[pos] != 0x00:
++            if not (match := _header_length_prefix_re.match(buf, pos)):
++                raise InvalidHeaderError("invalid header")
++            try:
++                length = int(match.group(1))
++            except ValueError:
++                raise InvalidHeaderError("invalid header")
++            # Headers must be at least 5 bytes, shortest being '5 x=\n'.
++            # Value is allowed to be empty.
++            if length < 5:
++                raise InvalidHeaderError("invalid header")
++            if pos + length > len(buf):
++                raise InvalidHeaderError("invalid header")
+ 
+-            length, keyword = match.groups()
+-            length = int(length)
+-            if length == 0:
++            header_value_end_offset = match.start(1) + length - 1  # Last byte of the header
++            keyword_and_value = buf[match.end(1) + 1:header_value_end_offset]
++            raw_keyword, equals, raw_value = keyword_and_value.partition(b"=")
++
++            # Check the framing of the header. The last character must be '\n' (0x0A)
++            if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A:
+                 raise InvalidHeaderError("invalid header")
+-            value = buf[match.end(2) + 1:match.start(1) + length - 1]
++            raw_headers.append((length, raw_keyword, raw_value))
++
++            # Check if the pax header contains a hdrcharset field. This tells us
++            # the encoding of the path, linkpath, uname and gname fields. Normally,
++            # these fields are UTF-8 encoded but since POSIX.1-2008 tar
++            # implementations are allowed to store them as raw binary strings if
++            # the translation to UTF-8 fails. For the time being, we don't care about
++            # anything other than "BINARY". The only other value that is currently
++            # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
++            # Note that we only follow the initial 'hdrcharset' setting to preserve
++            # the initial behavior of the 'tarfile' module.
++            if raw_keyword == b"hdrcharset" and encoding is None:
++                if raw_value == b"BINARY":
++                    encoding = tarfile.encoding
++                else:  # This branch ensures only the first 'hdrcharset' header is used.
++                    encoding = "utf-8"
++
++            pos += length
+ 
++        # If no explicit hdrcharset is set, we use UTF-8 as a default.
++        if encoding is None:
++            encoding = "utf-8"
++
++        # After parsing the raw headers we can decode them to text.
++        for length, raw_keyword, raw_value in raw_headers:
+             # Normally, we could just use "utf-8" as the encoding and "strict"
+             # as the error handler, but we better not take the risk. For
+             # example, GNU tar <= 1.23 is known to store filenames it cannot
+@@ -1441,17 +1462,16 @@ class TarInfo(object):
+             # hdrcharset=BINARY header).
+             # We first try the strict standard encoding, and if that fails we
+             # fall back on the user's encoding and error handler.
+-            keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
++            keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8",
+                     tarfile.errors)
+             if keyword in PAX_NAME_FIELDS:
+-                value = self._decode_pax_field(value, encoding, tarfile.encoding,
++                value = self._decode_pax_field(raw_value, encoding, tarfile.encoding,
+                         tarfile.errors)
+             else:
+-                value = self._decode_pax_field(value, "utf-8", "utf-8",
++                value = self._decode_pax_field(raw_value, "utf-8", "utf-8",
+                         tarfile.errors)
+ 
+             pax_headers[keyword] = value
+-            pos += length
+ 
+         # Fetch the next header.
+         try:
+@@ -1466,7 +1486,7 @@ class TarInfo(object):
+ 
+         elif "GNU.sparse.size" in pax_headers:
+             # GNU extended sparse format version 0.0.
+-            self._proc_gnusparse_00(next, pax_headers, buf)
++            self._proc_gnusparse_00(next, raw_headers)
+ 
+         elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
+             # GNU extended sparse format version 1.0.
+@@ -1488,15 +1508,24 @@ class TarInfo(object):
+ 
+         return next
+ 
+-    def _proc_gnusparse_00(self, next, pax_headers, buf):
++    def _proc_gnusparse_00(self, next, raw_headers):
+         """Process a GNU tar extended sparse header, version 0.0.
+         """
+         offsets = []
+-        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
+-            offsets.append(int(match.group(1)))
+         numbytes = []
+-        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
+-            numbytes.append(int(match.group(1)))
++        for _, keyword, value in raw_headers:
++            if keyword == b"GNU.sparse.offset":
++                try:
++                    offsets.append(int(value.decode()))
++                except ValueError:
++                    raise InvalidHeaderError("invalid header")
++
++            elif keyword == b"GNU.sparse.numbytes":
++                try:
++                    numbytes.append(int(value.decode()))
++                except ValueError:
++                    raise InvalidHeaderError("invalid header")
++
+         next.sparse = list(zip(offsets, numbytes))
+ 
+     def _proc_gnusparse_01(self, next, pax_headers):
+@@ -2875,4 +2904,4 @@ def main():
+             print('{!r} file created.'.format(tar_name))
+ 
+ if __name__ == '__main__':
+-    main()
++    main()
+\ No newline at end of file
+diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
+index 3df64c78032..cadd3f35808 100644
+--- a/Lib/test/test_tarfile.py
++++ b/Lib/test/test_tarfile.py
+@@ -1113,6 +1113,47 @@ class PaxReadTest(LongnameTest, ReadTest, unittest.TestCase):
+         finally:
+             tar.close()
+ 
++    def test_pax_header_bad_formats(self):
++        # The fields from the pax header have priority over the
++        # TarInfo.
++        pax_header_replacements = (
++            b" foo=bar\n",
++            b"0 \n",
++            b"1 \n",
++            b"2 \n",
++            b"3 =\n",
++            b"4 =a\n",
++            b"1000000 foo=bar\n",
++            b"0 foo=bar\n",
++            b"-12 foo=bar\n",
++            b"000000000000000000000000036 foo=bar\n",
++        )
++        pax_headers = {"foo": "bar"}
++
++        for replacement in pax_header_replacements:
++            with self.subTest(header=replacement):
++                tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
++                                   encoding="iso8859-1")
++                try:
++                    t = tarfile.TarInfo()
++                    t.name = "pax"  # non-ASCII
++                    t.uid = 1
++                    t.pax_headers = pax_headers
++                    tar.addfile(t)
++                finally:
++                    tar.close()
++
++                with open(tmpname, "rb") as f:
++                    data = f.read()
++                    self.assertIn(b"11 foo=bar\n", data)
++                    data = data.replace(b"11 foo=bar\n", replacement)
++
++                with open(tmpname, "wb") as f:
++                    f.truncate()
++                    f.write(data)
++
++                with self.assertRaisesRegex(tarfile.ReadError, r"file could not be opened successfully"):
++                    tarfile.open(tmpname, encoding="iso8859-1")
+ 
+ class WriteTestBase(TarTest):
+     # Put all write tests in here that are supposed to be tested
@@ -0,0 +1,126 @@
+diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
+index 17e95eb8623..31e9fef4355 100644
+--- a/Lib/test/test_zipfile.py
++++ b/Lib/test/test_zipfile.py
+@@ -3054,6 +3054,83 @@ class TestPath(unittest.TestCase):
+         data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
+         zipfile.CompleteDirs._implied_dirs(data)
+ 
++    def test_malformed_paths(self):
++        """
++        Path should handle malformed paths gracefully.
++
++        Paths with leading slashes are not visible.
++
++        Paths with dots are treated like regular files.
++        """
++        data = io.BytesIO()
++        zf = zipfile.ZipFile(data, "w")
++        zf.writestr("/one-slash.txt", b"content")
++        zf.writestr("//two-slash.txt", b"content")
++        zf.writestr("../parent.txt", b"content")
++        zf.filename = ''
++        root = zipfile.Path(zf)
++        assert list(map(str, root.iterdir())) == ['../']
++        assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content'
++
++    def test_unsupported_names(self):
++        """
++        Path segments with special characters are readable.
++
++        On some platforms or file systems, characters like
++        ``:`` and ``?`` are not allowed, but they are valid
++        in the zip file.
++        """
++        data = io.BytesIO()
++        zf = zipfile.ZipFile(data, "w")
++        zf.writestr("path?", b"content")
++        zf.writestr("V: NMS.flac", b"fLaC...")
++        zf.filename = ''
++        root = zipfile.Path(zf)
++        contents = root.iterdir()
++        assert next(contents).name == 'path?'
++        assert next(contents).name == 'V: NMS.flac'
++        assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..."
++
++    def test_backslash_not_separator(self):
++        """
++        In a zip file, backslashes are not separators.
++        """
++        data = io.BytesIO()
++        zf = zipfile.ZipFile(data, "w")
++        zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content")
++        zf.filename = ''
++        root = zipfile.Path(zf)
++        (first,) = root.iterdir()
++        assert not first.is_dir()
++        assert first.name == 'foo\\bar'
++
++
++class DirtyZipInfo(zipfile.ZipInfo):
++    """
++    Bypass name sanitization.
++    """
++
++    def __init__(self, filename, *args, **kwargs):
++        super().__init__(filename, *args, **kwargs)
++        self.filename = filename
++
++    @classmethod
++    def for_name(cls, name, archive):
++        """
++        Construct the same way that ZipFile.writestr does.
++
++        TODO: extract this functionality and re-use
++        """
++        self = cls(filename=name, date_time=time.localtime(time.time())[:6])
++        self.compress_type = archive.compression
++        self.compress_level = archive.compresslevel
++        if self.filename.endswith('/'):  # pragma: no cover
++            self.external_attr = 0o40775 << 16  # drwxrwxr-x
++            self.external_attr |= 0x10  # MS-DOS directory flag
++        else:
++            self.external_attr = 0o600 << 16  # ?rw-------
++        return self
++
+ 
+ if __name__ == "__main__":
+-    unittest.main()
++    unittest.main()
+\ No newline at end of file
+diff --git a/Lib/zipfile.py b/Lib/zipfile.py
+index 95f95ee1126..dd48a6a87ba 100644
+--- a/Lib/zipfile.py
++++ b/Lib/zipfile.py
+@@ -2146,7 +2146,7 @@ def _parents(path):
+ def _ancestry(path):
+     """
+     Given a path with elements separated by
+-    posixpath.sep, generate all elements of that path
++    posixpath.sep, generate all elements of that path.
+ 
+     >>> list(_ancestry('b/d'))
+     ['b/d', 'b']
+@@ -2158,9 +2158,14 @@ def _ancestry(path):
+     ['b']
+     >>> list(_ancestry(''))
+     []
++
++    Multiple separators are treated like a single.
++
++    >>> list(_ancestry('//b//d///f//'))
++    ['//b//d///f', '//b//d', '//b']
+     """
+     path = path.rstrip(posixpath.sep)
+-    while path and path != posixpath.sep:
++    while path.rstrip(posixpath.sep):
+         yield path
+         path, tail = posixpath.split(path)
+ 
+@@ -2446,4 +2451,4 @@ def main(args=None):
+ 
+ 
+ if __name__ == "__main__":
+-    main()
++    main()
+\ No newline at end of file
@@ -12,7 +12,7 @@
 Summary:        A high-level scripting language
 Name:           python3
 Version:        3.9.19
-Release:        4%{?dist}
+Release:        5%{?dist}
 License:        PSF
 Vendor:         Microsoft Corporation
 Distribution:   Mariner
@@ -24,6 +24,8 @@ Patch0:         cgi3.patch
 Patch1:         0001-gh-95231-Disable-md5-crypt-modules-if-FIPS-is-enable.patch
 Patch2:         CVE-2024-0397.patch
 Patch3:         CVE-2024-7592.patch
+Patch4:         CVE-2024-6232.patch
+Patch5:         CVE-2024-8088.patch
 # Patch for setuptools, resolved in 65.5.1
 Patch1000:      CVE-2022-40897.patch
 Patch1001:      CVE-2024-6345.patch
@@ -165,6 +167,8 @@ The test package contains all regression tests for Python as well as the modules
 %patch1 -p1
 %patch2 -p1
 %patch3 -p1
+%patch4 -p1
+%patch5 -p1
 
 %build
 # Remove GCC specs and build environment linker scripts
@@ -320,6 +324,9 @@ rm -rf %{buildroot}%{_bindir}/__pycache__
 %{_libdir}/python%{majmin}/test/*
 
 %changelog
+* Fri Sep 20 2024 Himaja Kesari <himajakesari@microsoft.com> - 3.9.19-5
+- Patch CVE-2024-6232 and CVE-2024-8088
+
 * Wed Aug 21 2024 Brian Fjeldstad <bfjelds@microsoft.com> - 3.9.19-4
 - Patch for CVE-2024-7592