diff --git a/CVE-2023-27043.patch b/CVE-2023-27043.patch new file mode 100644 index 0000000000000000000000000000000000000000..4eff95c4c4eeb385e04bfd66cb7ed7c1a668ed36 --- /dev/null +++ b/CVE-2023-27043.patch @@ -0,0 +1,554 @@ +From 067fc2ea57ecf8897806580794db93b94796d43f Mon Sep 17 00:00:00 2001 +From: Victor Stinner +Date: Fri, 15 Dec 2023 16:10:40 +0100 +Subject: [PATCH] [3.11] [CVE-2023-27043] gh-102988: Reject malformed addresses + in email.parseaddr() (GH-111116) + +Detect email address parsing errors and return empty tuple to +indicate the parsing error (old API). Add an optional 'strict' +parameter to getaddresses() and parseaddr() functions. Patch by +Thomas Dwyer. + +(cherry picked from commit 4a153a1d3b18803a684cd1bcc2cdf3ede3dbae19) + +Co-authored-by: Victor Stinner +Co-Authored-By: Thomas Dwyer +--- + Doc/library/email.utils.rst | 19 +- + Doc/whatsnew/3.11.rst | 10 + + Lib/email/utils.py | 151 ++++++++++++- + Lib/test/test_email/test_email.py | 204 +++++++++++++++++- + ...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 + + 5 files changed, 371 insertions(+), 21 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst + +diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst +index 0e266b6a45782a..6723dc4f13890d 100644 +--- a/Doc/library/email.utils.rst ++++ b/Doc/library/email.utils.rst +@@ -60,13 +60,18 @@ of the new API. + begins with angle brackets, they are stripped off. + + +-.. function:: parseaddr(address) ++.. function:: parseaddr(address, *, strict=True) + + Parse address -- which should be the value of some address-containing field such + as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and + *email address* parts. Returns a tuple of that information, unless the parse + fails, in which case a 2-tuple of ``('', '')`` is returned. + ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. ++ + + .. function:: formataddr(pair, charset='utf-8') + +@@ -84,12 +89,15 @@ of the new API. + Added the *charset* option. + + +-.. function:: getaddresses(fieldvalues) ++.. function:: getaddresses(fieldvalues, *, strict=True) + + This method returns a list of 2-tuples of the form returned by ``parseaddr()``. + *fieldvalues* is a sequence of header field values as might be returned by +- :meth:`Message.get_all `. Here's a simple +- example that gets all the recipients of a message:: ++ :meth:`Message.get_all `. ++ ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ Here's a simple example that gets all the recipients of a message:: + + from email.utils import getaddresses + +@@ -99,6 +107,9 @@ of the new API. + resent_ccs = msg.get_all('resent-cc', []) + all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. ++ + + .. function:: parsedate(date) + +diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst +index 06c7632ff7c5b1..583a408cace94c 100644 +--- a/Doc/whatsnew/3.11.rst ++++ b/Doc/whatsnew/3.11.rst +@@ -2776,3 +2776,13 @@ email + If you need to turn this safety feature off, + set :attr:`~email.policy.Policy.verify_generated_headers`. + (Contributed by Bas Bloemsaat and Petr Viktorin in :gh:`121650`.) ++ ++* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return ++ ``('', '')`` 2-tuples in more situations where invalid email addresses are ++ encountered, instead of potentially inaccurate values. ++ An optional *strict* parameter was added to these two functions: ++ use ``strict=False`` to get the old behavior, accepting malformed inputs. ++ ``getattr(email.utils, 'supports_strict_parsing', False)`` can be used to ++ check if the *strict* paramater is available. ++ (Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve ++ the CVE-2023-27043 fix.) +diff --git a/Lib/email/utils.py b/Lib/email/utils.py +index 8993858ab4853f..94ead0e91fa80d 100644 +--- a/Lib/email/utils.py ++++ b/Lib/email/utils.py +@@ -48,6 +48,7 @@ + specialsre = re.compile(r'[][\\()<>@,:;".]') + escapesre = re.compile(r'[\\"]') + ++ + def _has_surrogates(s): + """Return True if s contains surrogate-escaped binary data.""" + # This check is based on the fact that unless there are surrogates, utf8 +@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'): + return address + + ++def _iter_escaped_chars(addr): ++ pos = 0 ++ escape = False ++ for pos, ch in enumerate(addr): ++ if escape: ++ yield (pos, '\\' + ch) ++ escape = False ++ elif ch == '\\': ++ escape = True ++ else: ++ yield (pos, ch) ++ if escape: ++ yield (pos, '\\') ++ ++ ++def _strip_quoted_realnames(addr): ++ """Strip real names between quotes.""" ++ if '"' not in addr: ++ # Fast path ++ return addr ++ ++ start = 0 ++ open_pos = None ++ result = [] ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '"': ++ if open_pos is None: ++ open_pos = pos ++ else: ++ if start != open_pos: ++ result.append(addr[start:open_pos]) ++ start = pos + 1 ++ open_pos = None ++ ++ if start < len(addr): ++ result.append(addr[start:]) ++ ++ return ''.join(result) + +-def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" +- all = COMMASPACE.join(str(v) for v in fieldvalues) +- a = _AddressList(all) +- return a.addresslist ++ ++supports_strict_parsing = True ++ ++def getaddresses(fieldvalues, *, strict=True): ++ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. ++ ++ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in ++ its place. ++ ++ If strict is true, use a strict parser which rejects malformed inputs. ++ """ ++ ++ # If strict is true, if the resulting list of parsed addresses is greater ++ # than the number of fieldvalues in the input list, a parsing error has ++ # occurred and consequently a list containing a single empty 2-tuple [('', ++ # '')] is returned in its place. This is done to avoid invalid output. ++ # ++ # Malformed input: getaddresses(['alice@example.com ']) ++ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] ++ # Safe output: [('', '')] ++ ++ if not strict: ++ all = COMMASPACE.join(str(v) for v in fieldvalues) ++ a = _AddressList(all) ++ return a.addresslist ++ ++ fieldvalues = [str(v) for v in fieldvalues] ++ fieldvalues = _pre_parse_validation(fieldvalues) ++ addr = COMMASPACE.join(fieldvalues) ++ a = _AddressList(addr) ++ result = _post_parse_validation(a.addresslist) ++ ++ # Treat output as invalid if the number of addresses is not equal to the ++ # expected number of addresses. ++ n = 0 ++ for v in fieldvalues: ++ # When a comma is used in the Real Name part it is not a deliminator. ++ # So strip those out before counting the commas. ++ v = _strip_quoted_realnames(v) ++ # Expected number of addresses: 1 + number of commas ++ n += 1 + v.count(',') ++ if len(result) != n: ++ return [('', '')] ++ ++ return result ++ ++ ++def _check_parenthesis(addr): ++ # Ignore parenthesis in quoted real names. ++ addr = _strip_quoted_realnames(addr) ++ ++ opens = 0 ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '(': ++ opens += 1 ++ elif ch == ')': ++ opens -= 1 ++ if opens < 0: ++ return False ++ return (opens == 0) ++ ++ ++def _pre_parse_validation(email_header_fields): ++ accepted_values = [] ++ for v in email_header_fields: ++ if not _check_parenthesis(v): ++ v = "('', '')" ++ accepted_values.append(v) ++ ++ return accepted_values ++ ++ ++def _post_parse_validation(parsed_email_header_tuples): ++ accepted_values = [] ++ # The parser would have parsed a correctly formatted domain-literal ++ # The existence of an [ after parsing indicates a parsing failure ++ for v in parsed_email_header_tuples: ++ if '[' in v[1]: ++ v = ('', '') ++ accepted_values.append(v) ++ ++ return accepted_values + + + def _format_timetuple_and_zone(timetuple, zone): +@@ -205,16 +321,33 @@ def parsedate_to_datetime(data): + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + + +-def parseaddr(addr): ++def parseaddr(addr, *, strict=True): + """ + Parse addr into its constituent realname and email address parts. + + Return a tuple of realname and email address, unless the parse fails, in + which case return a 2-tuple of ('', ''). ++ ++ If strict is True, use a strict parser which rejects malformed inputs. + """ +- addrs = _AddressList(addr).addresslist +- if not addrs: +- return '', '' ++ if not strict: ++ addrs = _AddressList(addr).addresslist ++ if not addrs: ++ return ('', '') ++ return addrs[0] ++ ++ if isinstance(addr, list): ++ addr = addr[0] ++ ++ if not isinstance(addr, str): ++ return ('', '') ++ ++ addr = _pre_parse_validation([addr])[0] ++ addrs = _post_parse_validation(_AddressList(addr).addresslist) ++ ++ if not addrs or len(addrs) > 1: ++ return ('', '') ++ + return addrs[0] + + +diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py +index 785696e5c541fb..ad60ed3a7591c0 100644 +--- a/Lib/test/test_email/test_email.py ++++ b/Lib/test/test_email/test_email.py +@@ -17,6 +17,7 @@ + + import email + import email.policy ++import email.utils + + from email.charset import Charset + from email.generator import Generator, DecodedGenerator, BytesGenerator +@@ -3336,15 +3337,154 @@ def test_getaddresses(self): + [('Al Person', 'aperson@dom.ain'), + ('Bud Person', 'bperson@dom.ain')]) + ++ def test_getaddresses_comma_in_name(self): ++ """GH-106669 regression test.""" ++ self.assertEqual( ++ utils.getaddresses( ++ [ ++ '"Bud, Person" ', ++ 'aperson@dom.ain (Al Person)', ++ '"Mariusz Felisiak" ', ++ ] ++ ), ++ [ ++ ('Bud, Person', 'bperson@dom.ain'), ++ ('Al Person', 'aperson@dom.ain'), ++ ('Mariusz Felisiak', 'to@example.com'), ++ ], ++ ) ++ ++ def test_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" ++ alice = 'alice@example.org' ++ bob = 'bob@example.com' ++ empty = ('', '') ++ ++ # Test utils.getaddresses() and utils.parseaddr() on malformed email ++ # addresses: default behavior (strict=True) rejects malformed address, ++ # and strict=False which tolerates malformed address. ++ for invalid_separator, expected_non_strict in ( ++ ('(', [(f'<{bob}>', alice)]), ++ (')', [('', alice), empty, ('', bob)]), ++ ('<', [('', alice), empty, ('', bob), empty]), ++ ('>', [('', alice), empty, ('', bob)]), ++ ('[', [('', f'{alice}[<{bob}>]')]), ++ (']', [('', alice), empty, ('', bob)]), ++ ('@', [empty, empty, ('', bob)]), ++ (';', [('', alice), empty, ('', bob)]), ++ (':', [('', alice), ('', bob)]), ++ ('.', [('', alice + '.'), ('', bob)]), ++ ('"', [('', alice), ('', f'<{bob}>')]), ++ ): ++ address = f'{alice}{invalid_separator}<{bob}>' ++ with self.subTest(address=address): ++ self.assertEqual(utils.getaddresses([address]), ++ [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ expected_non_strict) ++ ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Comma (',') is treated differently depending on strict parameter. ++ # Comma without quotes. ++ address = f'{alice},<{bob}>' ++ self.assertEqual(utils.getaddresses([address]), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Real name between quotes containing comma. ++ address = '"Alice, alice@example.org" ' ++ expected_strict = ('Alice, alice@example.org', 'bob@example.com') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Valid parenthesis in comments. ++ address = 'alice@example.org (Alice)' ++ expected_strict = ('Alice', 'alice@example.org') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Invalid parenthesis in comments. ++ address = 'alice@example.org )Alice(' ++ self.assertEqual(utils.getaddresses([address]), [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Two addresses with quotes separated by comma. ++ address = '"Jane Doe" , "John Doe" ' ++ self.assertEqual(utils.getaddresses([address]), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Test email.utils.supports_strict_parsing attribute ++ self.assertEqual(email.utils.supports_strict_parsing, True) ++ + def test_getaddresses_nasty(self): +- eq = self.assertEqual +- eq(utils.getaddresses(['foo: ;']), [('', '')]) +- eq(utils.getaddresses( +- ['[]*-- =~$']), +- [('', ''), ('', ''), ('', '*--')]) +- eq(utils.getaddresses( +- ['foo: ;', '"Jason R. Mastaler" ']), +- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) ++ for addresses, expected in ( ++ (['"Sürname, Firstname" '], ++ [('Sürname, Firstname', 'to@example.com')]), ++ ++ (['foo: ;'], ++ [('', '')]), ++ ++ (['foo: ;', '"Jason R. Mastaler" '], ++ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), ++ ++ ([r'Pete(A nice \) chap) '], ++ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), ++ ++ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], ++ [('', '')]), ++ ++ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], ++ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), ++ ++ (['John Doe '], ++ [('John Doe (comment)', 'jdoe@machine.example')]), ++ ++ (['"Mary Smith: Personal Account" '], ++ [('Mary Smith: Personal Account', 'smith@home.example')]), ++ ++ (['Undisclosed recipients:;'], ++ [('', '')]), ++ ++ ([r', "Giant; \"Big\" Box" '], ++ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), ++ ): ++ with self.subTest(addresses=addresses): ++ self.assertEqual(utils.getaddresses(addresses), ++ expected) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ expected) ++ ++ addresses = ['[]*-- =~$'] ++ self.assertEqual(utils.getaddresses(addresses), ++ [('', '')]) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ [('', ''), ('', ''), ('', '*--')]) + + def test_getaddresses_embedded_comment(self): + """Test proper handling of a nested comment""" +@@ -3535,6 +3675,54 @@ def test_mime_classes_policy_argument(self): + m = cls(*constructor, policy=email.policy.default) + self.assertIs(m.policy, email.policy.default) + ++ def test_iter_escaped_chars(self): ++ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), ++ [(0, 'a'), ++ (2, '\\\\'), ++ (3, 'b'), ++ (5, '\\"'), ++ (6, 'c'), ++ (8, '\\\\'), ++ (9, '"'), ++ (10, 'd')]) ++ self.assertEqual(list(utils._iter_escaped_chars('a\\')), ++ [(0, 'a'), (1, '\\')]) ++ ++ def test_strip_quoted_realnames(self): ++ def check(addr, expected): ++ self.assertEqual(utils._strip_quoted_realnames(addr), expected) ++ ++ check('"Jane Doe" , "John Doe" ', ++ ' , ') ++ check(r'"Jane \"Doe\"." ', ++ ' ') ++ ++ # special cases ++ check(r'before"name"after', 'beforeafter') ++ check(r'before"name"', 'before') ++ check(r'b"name"', 'b') # single char ++ check(r'"name"after', 'after') ++ check(r'"name"a', 'a') # single char ++ check(r'"name"', '') ++ ++ # no change ++ for addr in ( ++ 'Jane Doe , John Doe ', ++ 'lone " quote', ++ ): ++ self.assertEqual(utils._strip_quoted_realnames(addr), addr) ++ ++ ++ def test_check_parenthesis(self): ++ addr = 'alice@example.net' ++ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) ++ ++ # Ignore real name between quotes ++ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) ++ + + # Test the iterator/generators + class TestIterators(TestEmailBase): +diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst +new file mode 100644 +index 00000000000000..3d0e9e4078c934 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst +@@ -0,0 +1,8 @@ ++:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now ++return ``('', '')`` 2-tuples in more situations where invalid email ++addresses are encountered instead of potentially inaccurate values. Add ++optional *strict* parameter to these two functions: use ``strict=False`` to ++get the old behavior, accept malformed inputs. ++``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check ++if the *strict* paramater is available. Patch by Thomas Dwyer and Victor ++Stinner to improve the CVE-2023-27043 fix. +From d3fec3d6aa1461244aacee78f1f23c4302a9473e Mon Sep 17 00:00:00 2001 +From: Petr Viktorin +Date: Fri, 6 Sep 2024 12:59:35 +0200 +Subject: [PATCH] [3.11] gh-102988: Adjust version numbers in versionadded + directives + +--- + Doc/library/email.utils.rst | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst +index 6723dc4f13890d..97ddf49261790a 100644 +--- a/Doc/library/email.utils.rst ++++ b/Doc/library/email.utils.rst +@@ -69,7 +69,7 @@ of the new API. + + If *strict* is true, use a strict parser which rejects malformed inputs. + +- .. versionchanged:: 3.13 ++ .. versionchanged:: 3.11.10 + Add *strict* optional parameter and reject malformed inputs by default. + + +@@ -107,7 +107,7 @@ of the new API. + resent_ccs = msg.get_all('resent-cc', []) + all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + +- .. versionchanged:: 3.13 ++ .. versionchanged:: 3.11.10 + Add *strict* optional parameter and reject malformed inputs by default. + + diff --git a/CVE-2024-0450.patch b/CVE-2024-0450.patch new file mode 100644 index 0000000000000000000000000000000000000000..8ed4cf0e3eb054acaf3f3048e0cf748082000816 --- /dev/null +++ b/CVE-2024-0450.patch @@ -0,0 +1,143 @@ +From a956e510f6336d5ae111ba429a61c3ade30a7549 Mon Sep 17 00:00:00 2001 +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Thu, 11 Jan 2024 10:24:47 +0100 +Subject: [PATCH] [3.11] gh-109858: Protect zipfile from "quoted-overlap" + zipbomb (GH-110016) (GH-113913) + +Raise BadZipFile when try to read an entry that overlaps with other entry or +central directory. +(cherry picked from commit 66363b9a7b9fe7c99eba3a185b74c5fdbf842eba) + +Co-authored-by: Serhiy Storchaka +--- + Lib/test/test_zipfile.py | 60 +++++++++++++++++++ + Lib/zipfile.py | 12 ++++ + ...-09-28-13-15-51.gh-issue-109858.43e2dg.rst | 3 + + 3 files changed, 75 insertions(+) + create mode 100644 Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst + +diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py +index c8e0159765ec2c..9354ab74faa94a 100644 +--- a/Lib/test/test_zipfile.py ++++ b/Lib/test/test_zipfile.py +@@ -2216,6 +2216,66 @@ def test_decompress_without_3rd_party_library(self): + with zipfile.ZipFile(zip_file) as zf: + self.assertRaises(RuntimeError, zf.extract, 'a.txt') + ++ @requires_zlib() ++ def test_full_overlap(self): ++ data = ( ++ b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' ++ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' ++ b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P' ++ b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2' ++ b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00' ++ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK' ++ b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' ++ b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00' ++ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05' ++ b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00' ++ b'\x00\x00\x00' ++ ) ++ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: ++ self.assertEqual(zipf.namelist(), ['a', 'b']) ++ zi = zipf.getinfo('a') ++ self.assertEqual(zi.header_offset, 0) ++ self.assertEqual(zi.compress_size, 16) ++ self.assertEqual(zi.file_size, 1033) ++ zi = zipf.getinfo('b') ++ self.assertEqual(zi.header_offset, 0) ++ self.assertEqual(zi.compress_size, 16) ++ self.assertEqual(zi.file_size, 1033) ++ self.assertEqual(len(zipf.read('a')), 1033) ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'): ++ zipf.read('b') ++ ++ @requires_zlib() ++ def test_quoted_overlap(self): ++ data = ( ++ b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc' ++ b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00' ++ b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l' ++ b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00' ++ b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\' ++ b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0' ++ b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01' ++ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' ++ b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l' ++ b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00' ++ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00' ++ b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00' ++ b'\x00S\x00\x00\x00\x00\x00' ++ ) ++ with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: ++ self.assertEqual(zipf.namelist(), ['a', 'b']) ++ zi = zipf.getinfo('a') ++ self.assertEqual(zi.header_offset, 0) ++ self.assertEqual(zi.compress_size, 52) ++ self.assertEqual(zi.file_size, 1064) ++ zi = zipf.getinfo('b') ++ self.assertEqual(zi.header_offset, 36) ++ self.assertEqual(zi.compress_size, 16) ++ self.assertEqual(zi.file_size, 1033) ++ with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'): ++ zipf.read('a') ++ self.assertEqual(len(zipf.read('b')), 1033) ++ + def tearDown(self): + unlink(TESTFN) + unlink(TESTFN2) +diff --git a/Lib/zipfile.py b/Lib/zipfile.py +index 6189db5e3e420d..058d7163ea17ac 100644 +--- a/Lib/zipfile.py ++++ b/Lib/zipfile.py +@@ -367,6 +367,7 @@ class ZipInfo (object): + 'compress_size', + 'file_size', + '_raw_time', ++ '_end_offset', + ) + + def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): +@@ -408,6 +409,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): + self.external_attr = 0 # External file attributes + self.compress_size = 0 # Size of the compressed file + self.file_size = 0 # Size of the uncompressed file ++ self._end_offset = None # Start of the next local header or central directory + # Other attributes are set by class ZipFile: + # header_offset Byte offset to the file header + # CRC CRC-32 of the uncompressed file +@@ -1437,6 +1439,12 @@ def _RealGetContents(self): + if self.debug > 2: + print("total", total) + ++ end_offset = self.start_dir ++ for zinfo in sorted(self.filelist, ++ key=lambda zinfo: zinfo.header_offset, ++ reverse=True): ++ zinfo._end_offset = end_offset ++ end_offset = zinfo.header_offset + + def namelist(self): + """Return a list of file names in the archive.""" +@@ -1590,6 +1598,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): + 'File name in directory %r and header %r differ.' + % (zinfo.orig_filename, fname)) + ++ if (zinfo._end_offset is not None and ++ zef_file.tell() + zinfo.compress_size > zinfo._end_offset): ++ raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)") ++ + # check for encrypted flag & handle password + is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED + if is_encrypted: +diff --git a/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst +new file mode 100644 +index 00000000000000..be279caffc46ee +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst +@@ -0,0 +1,3 @@ ++Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises ++BadZipFile when try to read an entry that overlaps with other entry or ++central directory. diff --git a/CVE-2024-3219.patch b/CVE-2024-3219.patch new file mode 100644 index 0000000000000000000000000000000000000000..f90caaded81d0f6ed32ced42f175c02a3d099aff --- /dev/null +++ b/CVE-2024-3219.patch @@ -0,0 +1,215 @@ +From 5f90abaa786f994db3907fc31e2ee00ea2cf0929 Mon Sep 17 00:00:00 2001 +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Tue, 30 Jul 2024 14:43:45 +0200 +Subject: [PATCH] [3.11] gh-122133: Authenticate socket connection for + `socket.socketpair()` fallback (GH-122134) (#122426) + +Authenticate socket connection for `socket.socketpair()` fallback when the platform does not have a native `socketpair` C API. We authenticate in-process using `getsocketname` and `getpeername` (thanks to Nathaniel J Smith for that suggestion). + +(cherry picked from commit 78df1043dbdce5c989600616f9f87b4ee72944e5) + +Co-authored-by: Seth Michael Larson +Co-authored-by: Gregory P. Smith +--- + Lib/socket.py | 17 +++ + Lib/test/test_socket.py | 128 +++++++++++++++++- + ...-07-22-13-11-28.gh-issue-122133.0mPeta.rst | 5 + + 3 files changed, 147 insertions(+), 3 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2024-07-22-13-11-28.gh-issue-122133.0mPeta.rst + +diff --git a/Lib/socket.py b/Lib/socket.py +index a0567b76bcfe2b..591d4739a64a91 100644 +--- a/Lib/socket.py ++++ b/Lib/socket.py +@@ -648,6 +648,23 @@ def socketpair(family=AF_INET, type=SOCK_STREAM, proto=0): + raise + finally: + lsock.close() ++ ++ # Authenticating avoids using a connection from something else ++ # able to connect to {host}:{port} instead of us. ++ # We expect only AF_INET and AF_INET6 families. ++ try: ++ if ( ++ ssock.getsockname() != csock.getpeername() ++ or csock.getsockname() != ssock.getpeername() ++ ): ++ raise ConnectionError("Unexpected peer connection") ++ except: ++ # getsockname() and getpeername() can fail ++ # if either socket isn't connected. ++ ssock.close() ++ csock.close() ++ raise ++ + return (ssock, csock) + __all__.append("socketpair") + +diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py +index 42adc573ecc2ea..a60eb436c7b5e0 100644 +--- a/Lib/test/test_socket.py ++++ b/Lib/test/test_socket.py +@@ -542,19 +542,27 @@ class SocketPairTest(unittest.TestCase, ThreadableTest): + def __init__(self, methodName='runTest'): + unittest.TestCase.__init__(self, methodName=methodName) + ThreadableTest.__init__(self) ++ self.cli = None ++ self.serv = None ++ ++ def socketpair(self): ++ # To be overridden by some child classes. ++ return socket.socketpair() + + def setUp(self): +- self.serv, self.cli = socket.socketpair() ++ self.serv, self.cli = self.socketpair() + + def tearDown(self): +- self.serv.close() ++ if self.serv: ++ self.serv.close() + self.serv = None + + def clientSetUp(self): + pass + + def clientTearDown(self): +- self.cli.close() ++ if self.cli: ++ self.cli.close() + self.cli = None + ThreadableTest.clientTearDown(self) + +@@ -4667,6 +4675,120 @@ def _testSend(self): + self.assertEqual(msg, MSG) + + ++class PurePythonSocketPairTest(SocketPairTest): ++ ++ # Explicitly use socketpair AF_INET or AF_INET6 to ensure that is the ++ # code path we're using regardless platform is the pure python one where ++ # `_socket.socketpair` does not exist. (AF_INET does not work with ++ # _socket.socketpair on many platforms). ++ def socketpair(self): ++ # called by super().setUp(). ++ try: ++ return socket.socketpair(socket.AF_INET6) ++ except OSError: ++ return socket.socketpair(socket.AF_INET) ++ ++ # Local imports in this class make for easy security fix backporting. ++ ++ def setUp(self): ++ import _socket ++ self._orig_sp = getattr(_socket, 'socketpair', None) ++ if self._orig_sp is not None: ++ # This forces the version using the non-OS provided socketpair ++ # emulation via an AF_INET socket in Lib/socket.py. ++ del _socket.socketpair ++ import importlib ++ global socket ++ socket = importlib.reload(socket) ++ else: ++ pass # This platform already uses the non-OS provided version. ++ super().setUp() ++ ++ def tearDown(self): ++ super().tearDown() ++ import _socket ++ if self._orig_sp is not None: ++ # Restore the default socket.socketpair definition. ++ _socket.socketpair = self._orig_sp ++ import importlib ++ global socket ++ socket = importlib.reload(socket) ++ ++ def test_recv(self): ++ msg = self.serv.recv(1024) ++ self.assertEqual(msg, MSG) ++ ++ def _test_recv(self): ++ self.cli.send(MSG) ++ ++ def test_send(self): ++ self.serv.send(MSG) ++ ++ def _test_send(self): ++ msg = self.cli.recv(1024) ++ self.assertEqual(msg, MSG) ++ ++ def test_ipv4(self): ++ cli, srv = socket.socketpair(socket.AF_INET) ++ cli.close() ++ srv.close() ++ ++ def _test_ipv4(self): ++ pass ++ ++ @unittest.skipIf(not hasattr(_socket, 'IPPROTO_IPV6') or ++ not hasattr(_socket, 'IPV6_V6ONLY'), ++ "IPV6_V6ONLY option not supported") ++ @unittest.skipUnless(socket_helper.IPV6_ENABLED, 'IPv6 required for this test') ++ def test_ipv6(self): ++ cli, srv = socket.socketpair(socket.AF_INET6) ++ cli.close() ++ srv.close() ++ ++ def _test_ipv6(self): ++ pass ++ ++ def test_injected_authentication_failure(self): ++ orig_getsockname = socket.socket.getsockname ++ inject_sock = None ++ ++ def inject_getsocketname(self): ++ nonlocal inject_sock ++ sockname = orig_getsockname(self) ++ # Connect to the listening socket ahead of the ++ # client socket. ++ if inject_sock is None: ++ inject_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) ++ inject_sock.setblocking(False) ++ try: ++ inject_sock.connect(sockname[:2]) ++ except (BlockingIOError, InterruptedError): ++ pass ++ inject_sock.setblocking(True) ++ return sockname ++ ++ sock1 = sock2 = None ++ try: ++ socket.socket.getsockname = inject_getsocketname ++ with self.assertRaises(OSError): ++ sock1, sock2 = socket.socketpair() ++ finally: ++ socket.socket.getsockname = orig_getsockname ++ if inject_sock: ++ inject_sock.close() ++ if sock1: # This cleanup isn't needed on a successful test. ++ sock1.close() ++ if sock2: ++ sock2.close() ++ ++ def _test_injected_authentication_failure(self): ++ # No-op. Exists for base class threading infrastructure to call. ++ # We could refactor this test into its own lesser class along with the ++ # setUp and tearDown code to construct an ideal; it is simpler to keep ++ # it here and live with extra overhead one this _one_ failure test. ++ pass ++ ++ + class NonBlockingTCPTests(ThreadedTCPSocketTest): + + def __init__(self, methodName='runTest'): +diff --git a/Misc/NEWS.d/next/Security/2024-07-22-13-11-28.gh-issue-122133.0mPeta.rst b/Misc/NEWS.d/next/Security/2024-07-22-13-11-28.gh-issue-122133.0mPeta.rst +new file mode 100644 +index 00000000000000..3544eb3824d0da +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2024-07-22-13-11-28.gh-issue-122133.0mPeta.rst +@@ -0,0 +1,5 @@ ++Authenticate the socket connection for the ``socket.socketpair()`` fallback ++on platforms where ``AF_UNIX`` is not available like Windows. ++ ++Patch by Gregory P. Smith and Seth Larson . Reported by Ellie ++ diff --git a/CVE-2024-4032.patch b/CVE-2024-4032.patch new file mode 100644 index 0000000000000000000000000000000000000000..181ed150ac3b12c8be1bc97d8e14c10d8a0b2a63 --- /dev/null +++ b/CVE-2024-4032.patch @@ -0,0 +1,343 @@ +From ba431579efdcbaed7a96f2ac4ea0775879a332fb Mon Sep 17 00:00:00 2001 +From: Petr Viktorin +Date: Thu, 25 Apr 2024 14:45:48 +0200 +Subject: [PATCH] [3.11] gh-113171: gh-65056: Fix "private" (non-global) IP + address ranges (GH-113179) (GH-113186) (GH-118177) (#118227) + +--- + Doc/library/ipaddress.rst | 43 +++++++- + Doc/whatsnew/3.11.rst | 9 ++ + Lib/ipaddress.py | 99 +++++++++++++++---- + Lib/test/test_ipaddress.py | 21 +++- + ...-03-14-01-38-44.gh-issue-113171.VFnObz.rst | 9 ++ + 5 files changed, 157 insertions(+), 24 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst + +diff --git a/Doc/library/ipaddress.rst b/Doc/library/ipaddress.rst +index 03dc956cd1352a..f57fa15aa5b930 100644 +--- a/Doc/library/ipaddress.rst ++++ b/Doc/library/ipaddress.rst +@@ -178,18 +178,53 @@ write code that handles both IP versions correctly. Address objects are + + .. attribute:: is_private + +- ``True`` if the address is allocated for private networks. See ++ ``True`` if the address is defined as not globally reachable by + iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ +- (for IPv6). ++ (for IPv6) with the following exceptions: ++ ++ * ``is_private`` is ``False`` for the shared address space (``100.64.0.0/10``) ++ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_private == address.ipv4_mapped.is_private ++ ++ ``is_private`` has value opposite to :attr:`is_global`, except for the shared address space ++ (``100.64.0.0/10`` range) where they are both ``False``. ++ ++ .. versionchanged:: 3.11.10 ++ ++ Fixed some false positives and false negatives. ++ ++ * ``192.0.0.0/24`` is considered private with the exception of ``192.0.0.9/32`` and ++ ``192.0.0.10/32`` (previously: only the ``192.0.0.0/29`` sub-range was considered private). ++ * ``64:ff9b:1::/48`` is considered private. ++ * ``2002::/16`` is considered private. ++ * There are exceptions within ``2001::/23`` (otherwise considered private): ``2001:1::1/128``, ++ ``2001:1::2/128``, ``2001:3::/32``, ``2001:4:112::/48``, ``2001:20::/28``, ``2001:30::/28``. ++ The exceptions are not considered private. + + .. attribute:: is_global + +- ``True`` if the address is allocated for public networks. See ++ ``True`` if the address is defined as globally reachable by + iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ +- (for IPv6). ++ (for IPv6) with the following exception: ++ ++ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_global == address.ipv4_mapped.is_global ++ ++ ``is_global`` has value opposite to :attr:`is_private`, except for the shared address space ++ (``100.64.0.0/10`` range) where they are both ``False``. + + .. versionadded:: 3.4 + ++ .. versionchanged:: 3.11.10 ++ ++ Fixed some false positives and false negatives, see :attr:`is_private` for details. ++ + .. attribute:: is_unspecified + + ``True`` if the address is unspecified. See :RFC:`5735` (for IPv4) +diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst +index f670fa1f097aa1..42b61c75c7e621 100644 +--- a/Doc/whatsnew/3.11.rst ++++ b/Doc/whatsnew/3.11.rst +@@ -2727,3 +2727,12 @@ OpenSSL + * Windows builds and macOS installers from python.org now use OpenSSL 3.0. + + .. _libb2: https://www.blake2.net/ ++ ++Notable changes in 3.11.10 ++========================== ++ ++ipaddress ++--------- ++ ++* Fixed ``is_global`` and ``is_private`` behavior in ``IPv4Address``, ++ ``IPv6Address``, ``IPv4Network`` and ``IPv6Network``. +diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py +index 16ba16cd7de49a..567beb37e06318 100644 +--- a/Lib/ipaddress.py ++++ b/Lib/ipaddress.py +@@ -1086,7 +1086,11 @@ def is_private(self): + """ + return any(self.network_address in priv_network and + self.broadcast_address in priv_network +- for priv_network in self._constants._private_networks) ++ for priv_network in self._constants._private_networks) and all( ++ self.network_address not in network and ++ self.broadcast_address not in network ++ for network in self._constants._private_networks_exceptions ++ ) + + @property + def is_global(self): +@@ -1333,18 +1337,41 @@ def is_reserved(self): + @property + @functools.lru_cache() + def is_private(self): +- """Test if this address is allocated for private networks. ++ """``True`` if the address is defined as not globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exceptions: + +- Returns: +- A boolean, True if the address is reserved per +- iana-ipv4-special-registry. ++ * ``is_private`` is ``False`` for ``100.64.0.0/10`` ++ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: + ++ address.is_private == address.ipv4_mapped.is_private ++ ++ ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. + """ +- return any(self in net for net in self._constants._private_networks) ++ return ( ++ any(self in net for net in self._constants._private_networks) ++ and all(self not in net for net in self._constants._private_networks_exceptions) ++ ) + + @property + @functools.lru_cache() + def is_global(self): ++ """``True`` if the address is defined as globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exception: ++ ++ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_global == address.ipv4_mapped.is_global ++ ++ ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. ++ """ + return self not in self._constants._public_network and not self.is_private + + @property +@@ -1548,13 +1575,15 @@ class _IPv4Constants: + + _public_network = IPv4Network('100.64.0.0/10') + ++ # Not globally reachable address blocks listed on ++ # https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml + _private_networks = [ + IPv4Network('0.0.0.0/8'), + IPv4Network('10.0.0.0/8'), + IPv4Network('127.0.0.0/8'), + IPv4Network('169.254.0.0/16'), + IPv4Network('172.16.0.0/12'), +- IPv4Network('192.0.0.0/29'), ++ IPv4Network('192.0.0.0/24'), + IPv4Network('192.0.0.170/31'), + IPv4Network('192.0.2.0/24'), + IPv4Network('192.168.0.0/16'), +@@ -1565,6 +1594,11 @@ class _IPv4Constants: + IPv4Network('255.255.255.255/32'), + ] + ++ _private_networks_exceptions = [ ++ IPv4Network('192.0.0.9/32'), ++ IPv4Network('192.0.0.10/32'), ++ ] ++ + _reserved_network = IPv4Network('240.0.0.0/4') + + _unspecified_address = IPv4Address('0.0.0.0') +@@ -2010,27 +2044,42 @@ def is_site_local(self): + @property + @functools.lru_cache() + def is_private(self): +- """Test if this address is allocated for private networks. ++ """``True`` if the address is defined as not globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exceptions: + +- Returns: +- A boolean, True if the address is reserved per +- iana-ipv6-special-registry, or is ipv4_mapped and is +- reserved in the iana-ipv4-special-registry. ++ * ``is_private`` is ``False`` for ``100.64.0.0/10`` ++ * For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: + ++ address.is_private == address.ipv4_mapped.is_private ++ ++ ``is_private`` has value opposite to :attr:`is_global`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. + """ + ipv4_mapped = self.ipv4_mapped + if ipv4_mapped is not None: + return ipv4_mapped.is_private +- return any(self in net for net in self._constants._private_networks) ++ return ( ++ any(self in net for net in self._constants._private_networks) ++ and all(self not in net for net in self._constants._private_networks_exceptions) ++ ) + + @property + def is_global(self): +- """Test if this address is allocated for public networks. ++ """``True`` if the address is defined as globally reachable by ++ iana-ipv4-special-registry_ (for IPv4) or iana-ipv6-special-registry_ ++ (for IPv6) with the following exception: + +- Returns: +- A boolean, true if the address is not reserved per +- iana-ipv6-special-registry. ++ For IPv4-mapped IPv6-addresses the ``is_private`` value is determined by the ++ semantics of the underlying IPv4 addresses and the following condition holds ++ (see :attr:`IPv6Address.ipv4_mapped`):: ++ ++ address.is_global == address.ipv4_mapped.is_global + ++ ``is_global`` has value opposite to :attr:`is_private`, except for the ``100.64.0.0/10`` ++ IPv4 range where they are both ``False``. + """ + return not self.is_private + +@@ -2271,19 +2320,31 @@ class _IPv6Constants: + + _multicast_network = IPv6Network('ff00::/8') + ++ # Not globally reachable address blocks listed on ++ # https://www.iana.org/assignments/iana-ipv6-special-registry/iana-ipv6-special-registry.xhtml + _private_networks = [ + IPv6Network('::1/128'), + IPv6Network('::/128'), + IPv6Network('::ffff:0:0/96'), ++ IPv6Network('64:ff9b:1::/48'), + IPv6Network('100::/64'), + IPv6Network('2001::/23'), +- IPv6Network('2001:2::/48'), + IPv6Network('2001:db8::/32'), +- IPv6Network('2001:10::/28'), ++ # IANA says N/A, let's consider it not globally reachable to be safe ++ IPv6Network('2002::/16'), + IPv6Network('fc00::/7'), + IPv6Network('fe80::/10'), + ] + ++ _private_networks_exceptions = [ ++ IPv6Network('2001:1::1/128'), ++ IPv6Network('2001:1::2/128'), ++ IPv6Network('2001:3::/32'), ++ IPv6Network('2001:4:112::/48'), ++ IPv6Network('2001:20::/28'), ++ IPv6Network('2001:30::/28'), ++ ] ++ + _reserved_networks = [ + IPv6Network('::/8'), IPv6Network('100::/8'), + IPv6Network('200::/7'), IPv6Network('400::/6'), +diff --git a/Lib/test/test_ipaddress.py b/Lib/test/test_ipaddress.py +index fc27628af17f8d..16c34163a007a2 100644 +--- a/Lib/test/test_ipaddress.py ++++ b/Lib/test/test_ipaddress.py +@@ -2269,6 +2269,10 @@ def testReservedIpv4(self): + self.assertEqual(True, ipaddress.ip_address( + '172.31.255.255').is_private) + self.assertEqual(False, ipaddress.ip_address('172.32.0.0').is_private) ++ self.assertFalse(ipaddress.ip_address('192.0.0.0').is_global) ++ self.assertTrue(ipaddress.ip_address('192.0.0.9').is_global) ++ self.assertTrue(ipaddress.ip_address('192.0.0.10').is_global) ++ self.assertFalse(ipaddress.ip_address('192.0.0.255').is_global) + + self.assertEqual(True, + ipaddress.ip_address('169.254.100.200').is_link_local) +@@ -2294,6 +2298,7 @@ def testPrivateNetworks(self): + self.assertEqual(True, ipaddress.ip_network("169.254.0.0/16").is_private) + self.assertEqual(True, ipaddress.ip_network("172.16.0.0/12").is_private) + self.assertEqual(True, ipaddress.ip_network("192.0.0.0/29").is_private) ++ self.assertEqual(False, ipaddress.ip_network("192.0.0.9/32").is_private) + self.assertEqual(True, ipaddress.ip_network("192.0.0.170/31").is_private) + self.assertEqual(True, ipaddress.ip_network("192.0.2.0/24").is_private) + self.assertEqual(True, ipaddress.ip_network("192.168.0.0/16").is_private) +@@ -2310,8 +2315,8 @@ def testPrivateNetworks(self): + self.assertEqual(True, ipaddress.ip_network("::/128").is_private) + self.assertEqual(True, ipaddress.ip_network("::ffff:0:0/96").is_private) + self.assertEqual(True, ipaddress.ip_network("100::/64").is_private) +- self.assertEqual(True, ipaddress.ip_network("2001::/23").is_private) + self.assertEqual(True, ipaddress.ip_network("2001:2::/48").is_private) ++ self.assertEqual(False, ipaddress.ip_network("2001:3::/48").is_private) + self.assertEqual(True, ipaddress.ip_network("2001:db8::/32").is_private) + self.assertEqual(True, ipaddress.ip_network("2001:10::/28").is_private) + self.assertEqual(True, ipaddress.ip_network("fc00::/7").is_private) +@@ -2390,6 +2395,20 @@ def testReservedIpv6(self): + self.assertEqual(True, ipaddress.ip_address('0::0').is_unspecified) + self.assertEqual(False, ipaddress.ip_address('::1').is_unspecified) + ++ self.assertFalse(ipaddress.ip_address('64:ff9b:1::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:1::1').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:1::2').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:2::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:3::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:4::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:4:112::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:10::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:20::').is_global) ++ self.assertTrue(ipaddress.ip_address('2001:30::').is_global) ++ self.assertFalse(ipaddress.ip_address('2001:40::').is_global) ++ self.assertFalse(ipaddress.ip_address('2002::').is_global) ++ + # some generic IETF reserved addresses + self.assertEqual(True, ipaddress.ip_address('100::').is_reserved) + self.assertEqual(True, ipaddress.ip_network('4000::1/128').is_reserved) +diff --git a/Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst b/Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst +new file mode 100644 +index 00000000000000..f9a72473be4e2c +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2024-03-14-01-38-44.gh-issue-113171.VFnObz.rst +@@ -0,0 +1,9 @@ ++Fixed various false positives and false negatives in ++ ++* :attr:`ipaddress.IPv4Address.is_private` (see these docs for details) ++* :attr:`ipaddress.IPv4Address.is_global` ++* :attr:`ipaddress.IPv6Address.is_private` ++* :attr:`ipaddress.IPv6Address.is_global` ++ ++Also in the corresponding :class:`ipaddress.IPv4Network` and :class:`ipaddress.IPv6Network` ++attributes. diff --git a/CVE-2024-6232.patch b/CVE-2024-6232.patch new file mode 100644 index 0000000000000000000000000000000000000000..b51e4b5f2d427e55aa0ab89c6b42bf7ebacef763 --- /dev/null +++ b/CVE-2024-6232.patch @@ -0,0 +1,245 @@ +From b7431133441a92670132600e5af78b64dd25539b Mon Sep 17 00:00:00 2001 +From: Seth Michael Larson +Date: Sat, 31 Aug 2024 17:17:05 -0500 +Subject: [PATCH] [3.11] gh-121285: Remove backtracking when parsing tarfile + headers (GH-121286) + +* Remove backtracking when parsing tarfile headers +* Rewrite PAX header parsing to be stricter +* Optimize parsing of GNU extended sparse headers v0.0 + +(cherry picked from commit 34ddb64d088dd7ccc321f6103d23153256caa5d4) + +Co-authored-by: Seth Michael Larson +Co-authored-by: Kirill Podoprigora +Co-authored-by: Gregory P. Smith +--- + Lib/tarfile.py | 105 +++++++++++------- + Lib/test/test_tarfile.py | 42 +++++++ + ...-07-02-13-39-20.gh-issue-121285.hrl-yI.rst | 2 + + 3 files changed, 111 insertions(+), 38 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst + +diff --git a/Lib/tarfile.py b/Lib/tarfile.py +index 612217b1ad05b3..0d6b925533b63d 100755 +--- a/Lib/tarfile.py ++++ b/Lib/tarfile.py +@@ -842,6 +842,9 @@ def data_filter(member, dest_path): + # Sentinel for replace() defaults, meaning "don't change the attribute" + _KEEP = object() + ++# Header length is digits followed by a space. ++_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ") ++ + class TarInfo(object): + """Informational class which holds the details about an + archive member given by a tar header block. +@@ -1411,41 +1414,59 @@ def _proc_pax(self, tarfile): + else: + pax_headers = tarfile.pax_headers.copy() + +- # Check if the pax header contains a hdrcharset field. This tells us +- # the encoding of the path, linkpath, uname and gname fields. Normally, +- # these fields are UTF-8 encoded but since POSIX.1-2008 tar +- # implementations are allowed to store them as raw binary strings if +- # the translation to UTF-8 fails. +- match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) +- if match is not None: +- pax_headers["hdrcharset"] = match.group(1).decode("utf-8") +- +- # For the time being, we don't care about anything other than "BINARY". +- # The only other value that is currently allowed by the standard is +- # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. +- hdrcharset = pax_headers.get("hdrcharset") +- if hdrcharset == "BINARY": +- encoding = tarfile.encoding +- else: +- encoding = "utf-8" +- + # Parse pax header information. A record looks like that: + # "%d %s=%s\n" % (length, keyword, value). length is the size + # of the complete record including the length field itself and +- # the newline. keyword and value are both UTF-8 encoded strings. +- regex = re.compile(br"(\d+) ([^=]+)=") ++ # the newline. + pos = 0 +- while True: +- match = regex.match(buf, pos) +- if not match: +- break ++ encoding = None ++ raw_headers = [] ++ while len(buf) > pos and buf[pos] != 0x00: ++ if not (match := _header_length_prefix_re.match(buf, pos)): ++ raise InvalidHeaderError("invalid header") ++ try: ++ length = int(match.group(1)) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ # Headers must be at least 5 bytes, shortest being '5 x=\n'. ++ # Value is allowed to be empty. ++ if length < 5: ++ raise InvalidHeaderError("invalid header") ++ if pos + length > len(buf): ++ raise InvalidHeaderError("invalid header") + +- length, keyword = match.groups() +- length = int(length) +- if length == 0: ++ header_value_end_offset = match.start(1) + length - 1 # Last byte of the header ++ keyword_and_value = buf[match.end(1) + 1:header_value_end_offset] ++ raw_keyword, equals, raw_value = keyword_and_value.partition(b"=") ++ ++ # Check the framing of the header. The last character must be '\n' (0x0A) ++ if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A: + raise InvalidHeaderError("invalid header") +- value = buf[match.end(2) + 1:match.start(1) + length - 1] ++ raw_headers.append((length, raw_keyword, raw_value)) ++ ++ # Check if the pax header contains a hdrcharset field. This tells us ++ # the encoding of the path, linkpath, uname and gname fields. Normally, ++ # these fields are UTF-8 encoded but since POSIX.1-2008 tar ++ # implementations are allowed to store them as raw binary strings if ++ # the translation to UTF-8 fails. For the time being, we don't care about ++ # anything other than "BINARY". The only other value that is currently ++ # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8. ++ # Note that we only follow the initial 'hdrcharset' setting to preserve ++ # the initial behavior of the 'tarfile' module. ++ if raw_keyword == b"hdrcharset" and encoding is None: ++ if raw_value == b"BINARY": ++ encoding = tarfile.encoding ++ else: # This branch ensures only the first 'hdrcharset' header is used. ++ encoding = "utf-8" ++ ++ pos += length + ++ # If no explicit hdrcharset is set, we use UTF-8 as a default. ++ if encoding is None: ++ encoding = "utf-8" ++ ++ # After parsing the raw headers we can decode them to text. ++ for length, raw_keyword, raw_value in raw_headers: + # Normally, we could just use "utf-8" as the encoding and "strict" + # as the error handler, but we better not take the risk. For + # example, GNU tar <= 1.23 is known to store filenames it cannot +@@ -1453,17 +1474,16 @@ def _proc_pax(self, tarfile): + # hdrcharset=BINARY header). + # We first try the strict standard encoding, and if that fails we + # fall back on the user's encoding and error handler. +- keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", ++ keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8", + tarfile.errors) + if keyword in PAX_NAME_FIELDS: +- value = self._decode_pax_field(value, encoding, tarfile.encoding, ++ value = self._decode_pax_field(raw_value, encoding, tarfile.encoding, + tarfile.errors) + else: +- value = self._decode_pax_field(value, "utf-8", "utf-8", ++ value = self._decode_pax_field(raw_value, "utf-8", "utf-8", + tarfile.errors) + + pax_headers[keyword] = value +- pos += length + + # Fetch the next header. + try: +@@ -1478,7 +1498,7 @@ def _proc_pax(self, tarfile): + + elif "GNU.sparse.size" in pax_headers: + # GNU extended sparse format version 0.0. +- self._proc_gnusparse_00(next, pax_headers, buf) ++ self._proc_gnusparse_00(next, raw_headers) + + elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": + # GNU extended sparse format version 1.0. +@@ -1500,15 +1520,24 @@ def _proc_pax(self, tarfile): + + return next + +- def _proc_gnusparse_00(self, next, pax_headers, buf): ++ def _proc_gnusparse_00(self, next, raw_headers): + """Process a GNU tar extended sparse header, version 0.0. + """ + offsets = [] +- for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): +- offsets.append(int(match.group(1))) + numbytes = [] +- for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): +- numbytes.append(int(match.group(1))) ++ for _, keyword, value in raw_headers: ++ if keyword == b"GNU.sparse.offset": ++ try: ++ offsets.append(int(value.decode())) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ ++ elif keyword == b"GNU.sparse.numbytes": ++ try: ++ numbytes.append(int(value.decode())) ++ except ValueError: ++ raise InvalidHeaderError("invalid header") ++ + next.sparse = list(zip(offsets, numbytes)) + + def _proc_gnusparse_01(self, next, pax_headers): +diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py +index 389da7be3a3247..c99c88ce93af02 100644 +--- a/Lib/test/test_tarfile.py ++++ b/Lib/test/test_tarfile.py +@@ -1208,6 +1208,48 @@ def test_pax_number_fields(self): + finally: + tar.close() + ++ def test_pax_header_bad_formats(self): ++ # The fields from the pax header have priority over the ++ # TarInfo. ++ pax_header_replacements = ( ++ b" foo=bar\n", ++ b"0 \n", ++ b"1 \n", ++ b"2 \n", ++ b"3 =\n", ++ b"4 =a\n", ++ b"1000000 foo=bar\n", ++ b"0 foo=bar\n", ++ b"-12 foo=bar\n", ++ b"000000000000000000000000036 foo=bar\n", ++ ) ++ pax_headers = {"foo": "bar"} ++ ++ for replacement in pax_header_replacements: ++ with self.subTest(header=replacement): ++ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, ++ encoding="iso8859-1") ++ try: ++ t = tarfile.TarInfo() ++ t.name = "pax" # non-ASCII ++ t.uid = 1 ++ t.pax_headers = pax_headers ++ tar.addfile(t) ++ finally: ++ tar.close() ++ ++ with open(tmpname, "rb") as f: ++ data = f.read() ++ self.assertIn(b"11 foo=bar\n", data) ++ data = data.replace(b"11 foo=bar\n", replacement) ++ ++ with open(tmpname, "wb") as f: ++ f.truncate() ++ f.write(data) ++ ++ with self.assertRaisesRegex(tarfile.ReadError, r"method tar: ReadError\('invalid header'\)"): ++ tarfile.open(tmpname, encoding="iso8859-1") ++ + + class WriteTestBase(TarTest): + # Put all write tests in here that are supposed to be tested +diff --git a/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst +new file mode 100644 +index 00000000000000..81f918bfe2b255 +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst +@@ -0,0 +1,2 @@ ++Remove backtracking from tarfile header parsing for ``hdrcharset``, PAX, and ++GNU sparse headers. diff --git a/CVE-2024-6923.patch b/CVE-2024-6923.patch new file mode 100644 index 0000000000000000000000000000000000000000..77c684258f28c131bf5ba4051537bab45091fe12 --- /dev/null +++ b/CVE-2024-6923.patch @@ -0,0 +1,366 @@ +From f7c0f09e69e950cf3c5ada9dbde93898eb975533 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=C5=81ukasz=20Langa?= +Date: Wed, 4 Sep 2024 17:37:28 +0200 +Subject: [PATCH] [3.11] gh-121650: Encode newlines in headers, and verify + headers are sound (GH-122233) (#122608) + +Per RFC 2047: + +> [...] these encoding schemes allow the +> encoding of arbitrary octet values, mail readers that implement this +> decoding should also ensure that display of the decoded data on the +> recipient's terminal will not cause unwanted side-effects + +It seems that the "quoted-word" scheme is a valid way to include +a newline character in a header value, just like we already allow +undecodable bytes or control characters. +They do need to be properly quoted when serialized to text, though. + +Verify that email headers are well-formed. + +This should fail for custom fold() implementations that aren't careful +about newlines. + +(cherry picked from commit 097633981879b3c9de9a1dd120d3aa585ecc2384) + +Co-authored-by: Petr Viktorin +Co-authored-by: Bas Bloemsaat +Co-authored-by: Serhiy Storchaka +--- + Doc/library/email.errors.rst | 7 +++ + Doc/library/email.policy.rst | 18 ++++++ + Doc/whatsnew/3.11.rst | 13 ++++ + Lib/email/_header_value_parser.py | 12 +++- + Lib/email/_policybase.py | 8 +++ + Lib/email/errors.py | 4 ++ + Lib/email/generator.py | 13 +++- + Lib/test/test_email/test_generator.py | 62 +++++++++++++++++++ + Lib/test/test_email/test_policy.py | 26 ++++++++ + ...-07-27-16-10-41.gh-issue-121650.nf6oc9.rst | 5 ++ + 10 files changed, 164 insertions(+), 4 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst + +diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst +index 194a986..f737f02 100644 +--- a/Doc/library/email.errors.rst ++++ b/Doc/library/email.errors.rst +@@ -59,6 +59,12 @@ The following exception classes are defined in the :mod:`email.errors` module: + :class:`~email.mime.image.MIMEImage`). + + ++.. exception:: HeaderWriteError() ++ ++ Raised when an error occurs when the :mod:`~email.generator` outputs ++ headers. ++ ++ + Here is the list of the defects that the :class:`~email.parser.FeedParser` + can find while parsing messages. Note that the defects are added to the message + where the problem was found, so for example, if a message nested inside a +diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst +index bb406c5a56ced2..3edba4028b106f 100644 +--- a/Doc/library/email.policy.rst ++++ b/Doc/library/email.policy.rst +@@ -228,6 +228,24 @@ added matters. To illustrate:: + + .. versionadded:: 3.6 + ++ ++ .. attribute:: verify_generated_headers ++ ++ If ``True`` (the default), the generator will raise ++ :exc:`~email.errors.HeaderWriteError` instead of writing a header ++ that is improperly folded or delimited, such that it would ++ be parsed as multiple headers or joined with adjacent data. ++ Such headers can be generated by custom header classes or bugs ++ in the ``email`` module. ++ ++ As it's a security feature, this defaults to ``True`` even in the ++ :class:`~email.policy.Compat32` policy. ++ For backwards compatible, but unsafe, behavior, it must be set to ++ ``False`` explicitly. ++ ++ .. versionadded:: 3.11.10 ++ ++ + The following :class:`Policy` method is intended to be called by code using + the email library to create policy instances with custom settings: + +diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst +index 37757212349e06..06c7632ff7c5b1 100644 +--- a/Doc/whatsnew/3.11.rst ++++ b/Doc/whatsnew/3.11.rst +@@ -2755,6 +2755,7 @@ OpenSSL + + .. _libb2: https://www.blake2.net/ + ++ + Notable changes in 3.11.10 + ========================== + +@@ -2763,3 +2764,15 @@ ipaddress + + * Fixed ``is_global`` and ``is_private`` behavior in ``IPv4Address``, + ``IPv6Address``, ``IPv4Network`` and ``IPv6Network``. ++ ++email ++----- ++ ++* Headers with embedded newlines are now quoted on output. ++ ++ The :mod:`~email.generator` will now refuse to serialize (write) headers ++ that are improperly folded or delimited, such that they would be parsed as ++ multiple headers or joined with adjacent data. ++ If you need to turn this safety feature off, ++ set :attr:`~email.policy.Policy.verify_generated_headers`. ++ (Contributed by Bas Bloemsaat and Petr Viktorin in :gh:`121650`.) +diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py +index 67e1fcb48ebc08..992394ea9fff95 100644 +--- a/Lib/email/_header_value_parser.py ++++ b/Lib/email/_header_value_parser.py +@@ -92,6 +92,8 @@ + ASPECIALS = TSPECIALS | set("*'%") + ATTRIBUTE_ENDS = ASPECIALS | WSP + EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') ++NLSET = {'\n', '\r'} ++SPECIALSNL = SPECIALS | NLSET + + def quote_string(value): + return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' +@@ -2781,9 +2783,13 @@ def _refold_parse_tree(parse_tree, *, policy): + wrap_as_ew_blocked -= 1 + continue + tstr = str(part) +- if part.token_type == 'ptext' and set(tstr) & SPECIALS: +- # Encode if tstr contains special characters. +- want_encoding = True ++ if not want_encoding: ++ if part.token_type == 'ptext': ++ # Encode if tstr contains special characters. ++ want_encoding = not SPECIALSNL.isdisjoint(tstr) ++ else: ++ # Encode if tstr contains newlines. ++ want_encoding = not NLSET.isdisjoint(tstr) + try: + tstr.encode(encoding) + charset = encoding +diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py +index c9cbadd2a80c48..d1f48211f90970 100644 +--- a/Lib/email/_policybase.py ++++ b/Lib/email/_policybase.py +@@ -157,6 +157,13 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): + message_factory -- the class to use to create new message objects. + If the value is None, the default is Message. + ++ verify_generated_headers ++ -- if true, the generator verifies that each header ++ they are properly folded, so that a parser won't ++ treat it as multiple headers, start-of-body, or ++ part of another header. ++ This is a check against custom Header & fold() ++ implementations. + """ + + raise_on_defect = False +@@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): + max_line_length = 78 + mangle_from_ = False + message_factory = None ++ verify_generated_headers = True + + def handle_defect(self, obj, defect): + """Based on policy, either raise defect or call register_defect. +diff --git a/Lib/email/errors.py b/Lib/email/errors.py +index 3ad00565549968..02aa5eced6ae46 100644 +--- a/Lib/email/errors.py ++++ b/Lib/email/errors.py +@@ -29,6 +29,10 @@ class CharsetError(MessageError): + """An illegal charset was given.""" + + ++class HeaderWriteError(MessageError): ++ """Error while writing headers.""" ++ ++ + # These are parsing defects which the parser was able to work around. + class MessageDefect(ValueError): + """Base class for a message defect.""" +diff --git a/Lib/email/generator.py b/Lib/email/generator.py +index eb597de76d42ef..563ca170726943 100644 +--- a/Lib/email/generator.py ++++ b/Lib/email/generator.py +@@ -14,12 +14,14 @@ + from copy import deepcopy + from io import StringIO, BytesIO + from email.utils import _has_surrogates ++from email.errors import HeaderWriteError + + UNDERSCORE = '_' + NL = '\n' # XXX: no longer used by the code below. + + NLCRE = re.compile(r'\r\n|\r|\n') + fcre = re.compile(r'^From ', re.MULTILINE) ++NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') + + + class Generator: +@@ -222,7 +224,16 @@ def _dispatch(self, msg): + + def _write_headers(self, msg): + for h, v in msg.raw_items(): +- self.write(self.policy.fold(h, v)) ++ folded = self.policy.fold(h, v) ++ if self.policy.verify_generated_headers: ++ linesep = self.policy.linesep ++ if not folded.endswith(self.policy.linesep): ++ raise HeaderWriteError( ++ f'folded header does not end with {linesep!r}: {folded!r}') ++ if NEWLINE_WITHOUT_FWSP.search(folded.removesuffix(linesep)): ++ raise HeaderWriteError( ++ f'folded header contains newline: {folded!r}') ++ self.write(folded) + # A blank line always separates headers from body + self.write(self._NL) + +diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py +index 89e7edeb63a892..d29400f0ed1dbb 100644 +--- a/Lib/test/test_email/test_generator.py ++++ b/Lib/test/test_email/test_generator.py +@@ -6,6 +6,7 @@ + from email.generator import Generator, BytesGenerator + from email.headerregistry import Address + from email import policy ++import email.errors + from test.test_email import TestEmailBase, parameterize + + +@@ -216,6 +217,44 @@ def test_rfc2231_wrapping_switches_to_default_len_if_too_narrow(self): + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + ++ def test_keep_encoded_newlines(self): ++ msg = self.msgmaker(self.typ(textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com ++ ++ None ++ """))) ++ expected = textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com ++ ++ None ++ """) ++ s = self.ioclass() ++ g = self.genclass(s, policy=self.policy.clone(max_line_length=80)) ++ g.flatten(msg) ++ self.assertEqual(s.getvalue(), self.typ(expected)) ++ ++ def test_keep_long_encoded_newlines(self): ++ msg = self.msgmaker(self.typ(textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com ++ ++ None ++ """))) ++ expected = textwrap.dedent("""\ ++ To: nobody ++ Subject: Bad subject ++ =?utf-8?q?=0A?=Bcc: ++ injection@example.com ++ ++ None ++ """) ++ s = self.ioclass() ++ g = self.genclass(s, policy=self.policy.clone(max_line_length=30)) ++ g.flatten(msg) ++ self.assertEqual(s.getvalue(), self.typ(expected)) ++ + + class TestGenerator(TestGeneratorBase, TestEmailBase): + +@@ -224,6 +263,29 @@ class TestGenerator(TestGeneratorBase, TestEmailBase): + ioclass = io.StringIO + typ = str + ++ def test_verify_generated_headers(self): ++ """gh-121650: by default the generator prevents header injection""" ++ class LiteralHeader(str): ++ name = 'Header' ++ def fold(self, **kwargs): ++ return self ++ ++ for text in ( ++ 'Value\r\nBad Injection\r\n', ++ 'NoNewLine' ++ ): ++ with self.subTest(text=text): ++ message = message_from_string( ++ "Header: Value\r\n\r\nBody", ++ policy=self.policy, ++ ) ++ ++ del message['Header'] ++ message['Header'] = LiteralHeader(text) ++ ++ with self.assertRaises(email.errors.HeaderWriteError): ++ message.as_string() ++ + + class TestBytesGenerator(TestGeneratorBase, TestEmailBase): + +diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py +index c6b9c80efe1b54..baa35fd68e49c5 100644 +--- a/Lib/test/test_email/test_policy.py ++++ b/Lib/test/test_email/test_policy.py +@@ -26,6 +26,7 @@ class PolicyAPITests(unittest.TestCase): + 'raise_on_defect': False, + 'mangle_from_': True, + 'message_factory': None, ++ 'verify_generated_headers': True, + } + # These default values are the ones set on email.policy.default. + # If any of these defaults change, the docs must be updated. +@@ -294,6 +295,31 @@ def test_short_maxlen_error(self): + with self.assertRaises(email.errors.HeaderParseError): + policy.fold("Subject", subject) + ++ def test_verify_generated_headers(self): ++ """Turning protection off allows header injection""" ++ policy = email.policy.default.clone(verify_generated_headers=False) ++ for text in ( ++ 'Header: Value\r\nBad: Injection\r\n', ++ 'Header: NoNewLine' ++ ): ++ with self.subTest(text=text): ++ message = email.message_from_string( ++ "Header: Value\r\n\r\nBody", ++ policy=policy, ++ ) ++ class LiteralHeader(str): ++ name = 'Header' ++ def fold(self, **kwargs): ++ return self ++ ++ del message['Header'] ++ message['Header'] = LiteralHeader(text) ++ ++ self.assertEqual( ++ message.as_string(), ++ f"{text}\nBody", ++ ) ++ + # XXX: Need subclassing tests. + # For adding subclassed objects, make sure the usual rules apply (subclass + # wins), but that the order still works (right overrides left). +diff --git a/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst +new file mode 100644 +index 00000000000000..83dd28d4ac575b +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst +@@ -0,0 +1,5 @@ ++:mod:`email` headers with embedded newlines are now quoted on output. The ++:mod:`~email.generator` will now refuse to serialize (write) headers that ++are unsafely folded or delimited; see ++:attr:`~email.policy.Policy.verify_generated_headers`. (Contributed by Bas ++Bloemsaat and Petr Viktorin in :gh:`121650`.) diff --git a/CVE-2024-7592.patch b/CVE-2024-7592.patch new file mode 100644 index 0000000000000000000000000000000000000000..b9e9b34e891e31bbd02593b47d3215260f7ae105 --- /dev/null +++ b/CVE-2024-7592.patch @@ -0,0 +1,134 @@ +From d4ac921a4b081f7f996a5d2b101684b67ba0ed7f Mon Sep 17 00:00:00 2001 +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Wed, 4 Sep 2024 17:50:00 +0200 +Subject: [PATCH] [3.11] gh-123067: Fix quadratic complexity in parsing + "-quoted cookie values with backslashes (GH-123075) (#123105) + +This fixes CVE-2024-7592. +(cherry picked from commit 44e458357fca05ca0ae2658d62c8c595b048b5ef) + +Co-authored-by: Serhiy Storchaka +--- + Lib/http/cookies.py | 34 ++++------------- + Lib/test/test_http_cookies.py | 38 +++++++++++++++++++ + ...-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst | 1 + + 3 files changed, 47 insertions(+), 26 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst + +diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py +index 35ac2dc6ae280c..2c1f021d0abede 100644 +--- a/Lib/http/cookies.py ++++ b/Lib/http/cookies.py +@@ -184,8 +184,13 @@ def _quote(str): + return '"' + str.translate(_Translator) + '"' + + +-_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") +-_QuotePatt = re.compile(r"[\\].") ++_unquote_sub = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))').sub ++ ++def _unquote_replace(m): ++ if m[1]: ++ return chr(int(m[1], 8)) ++ else: ++ return m[2] + + def _unquote(str): + # If there aren't any doublequotes, +@@ -205,30 +210,7 @@ def _unquote(str): + # \012 --> \n + # \" --> " + # +- i = 0 +- n = len(str) +- res = [] +- while 0 <= i < n: +- o_match = _OctalPatt.search(str, i) +- q_match = _QuotePatt.search(str, i) +- if not o_match and not q_match: # Neither matched +- res.append(str[i:]) +- break +- # else: +- j = k = -1 +- if o_match: +- j = o_match.start(0) +- if q_match: +- k = q_match.start(0) +- if q_match and (not o_match or k < j): # QuotePatt matched +- res.append(str[i:k]) +- res.append(str[k+1]) +- i = k + 2 +- else: # OctalPatt matched +- res.append(str[i:j]) +- res.append(chr(int(str[j+1:j+4], 8))) +- i = j + 4 +- return _nulljoin(res) ++ return _unquote_sub(_unquote_replace, str) + + # The _getdate() routine is used to set the expiration time in the cookie's HTTP + # header. By default, _getdate() returns the current time in the appropriate +diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py +index 925c8697f60de6..8879902a6e2f41 100644 +--- a/Lib/test/test_http_cookies.py ++++ b/Lib/test/test_http_cookies.py +@@ -5,6 +5,7 @@ + import doctest + from http import cookies + import pickle ++from test import support + + + class CookieTests(unittest.TestCase): +@@ -58,6 +59,43 @@ def test_basic(self): + for k, v in sorted(case['dict'].items()): + self.assertEqual(C[k].value, v) + ++ def test_unquote(self): ++ cases = [ ++ (r'a="b=\""', 'b="'), ++ (r'a="b=\\"', 'b=\\'), ++ (r'a="b=\="', 'b=='), ++ (r'a="b=\n"', 'b=n'), ++ (r'a="b=\042"', 'b="'), ++ (r'a="b=\134"', 'b=\\'), ++ (r'a="b=\377"', 'b=\xff'), ++ (r'a="b=\400"', 'b=400'), ++ (r'a="b=\42"', 'b=42'), ++ (r'a="b=\\042"', 'b=\\042'), ++ (r'a="b=\\134"', 'b=\\134'), ++ (r'a="b=\\\""', 'b=\\"'), ++ (r'a="b=\\\042"', 'b=\\"'), ++ (r'a="b=\134\""', 'b=\\"'), ++ (r'a="b=\134\042"', 'b=\\"'), ++ ] ++ for encoded, decoded in cases: ++ with self.subTest(encoded): ++ C = cookies.SimpleCookie() ++ C.load(encoded) ++ self.assertEqual(C['a'].value, decoded) ++ ++ @support.requires_resource('cpu') ++ def test_unquote_large(self): ++ n = 10**6 ++ for encoded in r'\\', r'\134': ++ with self.subTest(encoded): ++ data = 'a="b=' + encoded*n + ';"' ++ C = cookies.SimpleCookie() ++ C.load(data) ++ value = C['a'].value ++ self.assertEqual(value[:3], 'b=\\') ++ self.assertEqual(value[-2:], '\\;') ++ self.assertEqual(len(value), n + 3) ++ + def test_load(self): + C = cookies.SimpleCookie() + C.load('Customer="WILE_E_COYOTE"; Version=1; Path=/acme') +diff --git a/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst +new file mode 100644 +index 00000000000000..6a234561fe31a3 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst +@@ -0,0 +1 @@ ++Fix quadratic complexity in parsing ``"``-quoted cookie values with backslashes by :mod:`http.cookies`. + diff --git a/CVE-2024-8088.patch b/CVE-2024-8088.patch new file mode 100644 index 0000000000000000000000000000000000000000..fa9c257b03e95a002bd4aedbef9aa72f64198615 --- /dev/null +++ b/CVE-2024-8088.patch @@ -0,0 +1,147 @@ +From a012256c25a773475c9389cfb8c109d7b7317eb3 Mon Sep 17 00:00:00 2001 +From: "Jason R. Coombs" +Date: Thu, 10 Jul 2025 01:10:41 -0400 +Subject: [PATCH 1/1] fix CVE-2024-8088 + +--- + Lib/test/test_zipfile.py | 76 ++++++++++++++++++++++++++++++++++++++++ + Lib/zipfile.py | 11 ++++-- + 2 files changed, 84 insertions(+), 3 deletions(-) + +diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py +index c8e0159..d0025e2 100644 +--- a/Lib/test/test_zipfile.py ++++ b/Lib/test/test_zipfile.py +@@ -3512,6 +3512,82 @@ with zipfile.ZipFile(io.BytesIO(), "w") as zf: + zipfile.Path(zf) + zf.extractall(source_path.parent) + ++ def test_malformed_paths(self): ++ """ ++ Path should handle malformed paths gracefully. ++ ++ Paths with leading slashes are not visible. ++ ++ Paths with dots are treated like regular files. ++ """ ++ data = io.BytesIO() ++ zf = zipfile.ZipFile(data, "w") ++ zf.writestr("/one-slash.txt", b"content") ++ zf.writestr("//two-slash.txt", b"content") ++ zf.writestr("../parent.txt", b"content") ++ zf.filename = '' ++ root = zipfile.Path(zf) ++ assert list(map(str, root.iterdir())) == ['../'] ++ assert root.joinpath('..').joinpath('parent.txt').read_bytes() == b'content' ++ ++ def test_unsupported_names(self): ++ """ ++ Path segments with special characters are readable. ++ ++ On some platforms or file systems, characters like ++ ``:`` and ``?`` are not allowed, but they are valid ++ in the zip file. ++ """ ++ data = io.BytesIO() ++ zf = zipfile.ZipFile(data, "w") ++ zf.writestr("path?", b"content") ++ zf.writestr("V: NMS.flac", b"fLaC...") ++ zf.filename = '' ++ root = zipfile.Path(zf) ++ contents = root.iterdir() ++ assert next(contents).name == 'path?' ++ assert next(contents).name == 'V: NMS.flac' ++ assert root.joinpath('V: NMS.flac').read_bytes() == b"fLaC..." ++ ++ def test_backslash_not_separator(self): ++ """ ++ In a zip file, backslashes are not separators. ++ """ ++ data = io.BytesIO() ++ zf = zipfile.ZipFile(data, "w") ++ zf.writestr(DirtyZipInfo.for_name("foo\\bar", zf), b"content") ++ zf.filename = '' ++ root = zipfile.Path(zf) ++ (first,) = root.iterdir() ++ assert not first.is_dir() ++ assert first.name == 'foo\\bar' ++ ++ ++class DirtyZipInfo(zipfile.ZipInfo): ++ """ ++ Bypass name sanitization. ++ """ ++ ++ def __init__(self, filename, *args, **kwargs): ++ super().__init__(filename, *args, **kwargs) ++ self.filename = filename ++ ++ @classmethod ++ def for_name(cls, name, archive): ++ """ ++ Construct the same way that ZipFile.writestr does. ++ ++ TODO: extract this functionality and re-use ++ """ ++ self = cls(filename=name, date_time=time.localtime(time.time())[:6]) ++ self.compress_type = archive.compression ++ self.compress_level = archive.compresslevel ++ if self.filename.endswith('/'): # pragma: no cover ++ self.external_attr = 0o40775 << 16 # drwxrwxr-x ++ self.external_attr |= 0x10 # MS-DOS directory flag ++ else: ++ self.external_attr = 0o600 << 16 # ?rw------- ++ return self + + class EncodedMetadataTests(unittest.TestCase): + file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three' +diff --git a/Lib/zipfile.py b/Lib/zipfile.py +index 6189db5..49d0ad3 100644 +--- a/Lib/zipfile.py ++++ b/Lib/zipfile.py +@@ -9,6 +9,7 @@ import io + import itertools + import os + import posixpath ++import re + import shutil + import stat + import struct +@@ -2192,7 +2193,7 @@ def _parents(path): + def _ancestry(path): + """ + Given a path with elements separated by +- posixpath.sep, generate all elements of that path ++ posixpath.sep, generate all elements of that path. + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] +@@ -2204,9 +2205,14 @@ def _ancestry(path): + ['b'] + >>> list(_ancestry('')) + [] ++ ++ Multiple separators are treated like a single. ++ ++ >>> list(_ancestry('//b//d///f//')) ++ ['//b//d///f', '//b//d', '//b'] + """ + path = path.rstrip(posixpath.sep) +- while path and path != posixpath.sep: ++ while path.rstrip(posixpath.sep): + yield path + path, tail = posixpath.split(path) + +@@ -2222,7 +2228,6 @@ def _difference(minuend, subtrahend): + """ + return itertools.filterfalse(set(subtrahend).__contains__, minuend) + +- + class CompleteDirs(ZipFile): + """ + A ZipFile subclass that ensures that implied directories +-- +2.41.0 + + + diff --git a/CVE-2024-9287.patch b/CVE-2024-9287.patch new file mode 100644 index 0000000000000000000000000000000000000000..4ea7b34fee222e532bebfbba366e9035241ae9a4 --- /dev/null +++ b/CVE-2024-9287.patch @@ -0,0 +1,288 @@ +diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py +index eb9227a..7ecae7f 100644 +--- a/Lib/test/test_venv.py ++++ b/Lib/test/test_venv.py +@@ -17,7 +17,8 @@ import subprocess + import sys + import sysconfig + import tempfile +-from test.support import (captured_stdout, captured_stderr, requires_zlib, ++import shlex ++from test.support import (captured_stdout, captured_stderr, + skip_if_broken_multiprocessing_synchronize, verbose, + requires_subprocess, is_emscripten, is_wasi, + requires_venv_with_pip, TEST_HOME_DIR, +@@ -96,6 +97,10 @@ class BaseTest(unittest.TestCase): + result = f.read() + return result + ++ def assertEndsWith(self, string, tail): ++ if not string.endswith(tail): ++ self.fail(f"String {string!r} does not end with {tail!r}") ++ + class BasicTest(BaseTest): + """Test venv module functionality.""" + +@@ -445,6 +450,82 @@ class BasicTest(BaseTest): + 'import sys; print(sys.executable)']) + self.assertEqual(out.strip(), envpy.encode()) + ++ # gh-124651: test quoted strings ++ @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows') ++ def test_special_chars_bash(self): ++ """ ++ Test that the template strings are quoted properly (bash) ++ """ ++ rmtree(self.env_dir) ++ bash = shutil.which('bash') ++ if bash is None: ++ self.skipTest('bash required for this test') ++ env_name = '"\';&&$e|\'"' ++ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) ++ builder = venv.EnvBuilder(clear=True) ++ builder.create(env_dir) ++ activate = os.path.join(env_dir, self.bindir, 'activate') ++ test_script = os.path.join(self.env_dir, 'test_special_chars.sh') ++ with open(test_script, "w") as f: ++ f.write(f'source {shlex.quote(activate)}\n' ++ 'python -c \'import sys; print(sys.executable)\'\n' ++ 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n' ++ 'deactivate\n') ++ out, err = check_output([bash, test_script]) ++ lines = out.splitlines() ++ self.assertTrue(env_name.encode() in lines[0]) ++ self.assertEndsWith(lines[1], env_name.encode()) ++ ++ # gh-124651: test quoted strings ++ @unittest.skipIf(os.name == 'nt', 'contains invalid characters on Windows') ++ def test_special_chars_csh(self): ++ """ ++ Test that the template strings are quoted properly (csh) ++ """ ++ rmtree(self.env_dir) ++ csh = shutil.which('tcsh') or shutil.which('csh') ++ if csh is None: ++ self.skipTest('csh required for this test') ++ env_name = '"\';&&$e|\'"' ++ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) ++ builder = venv.EnvBuilder(clear=True) ++ builder.create(env_dir) ++ activate = os.path.join(env_dir, self.bindir, 'activate.csh') ++ test_script = os.path.join(self.env_dir, 'test_special_chars.csh') ++ with open(test_script, "w") as f: ++ f.write(f'source {shlex.quote(activate)}\n' ++ 'python -c \'import sys; print(sys.executable)\'\n' ++ 'python -c \'import os; print(os.environ["VIRTUAL_ENV"])\'\n' ++ 'deactivate\n') ++ out, err = check_output([csh, test_script]) ++ lines = out.splitlines() ++ self.assertTrue(env_name.encode() in lines[0]) ++ self.assertEndsWith(lines[1], env_name.encode()) ++ ++ # gh-124651: test quoted strings on Windows ++ @unittest.skipUnless(os.name == 'nt', 'only relevant on Windows') ++ def test_special_chars_windows(self): ++ """ ++ Test that the template strings are quoted properly on Windows ++ """ ++ rmtree(self.env_dir) ++ env_name = "'&&^$e" ++ env_dir = os.path.join(os.path.realpath(self.env_dir), env_name) ++ builder = venv.EnvBuilder(clear=True) ++ builder.create(env_dir) ++ activate = os.path.join(env_dir, self.bindir, 'activate.bat') ++ test_batch = os.path.join(self.env_dir, 'test_special_chars.bat') ++ with open(test_batch, "w") as f: ++ f.write('@echo off\n' ++ f'"{activate}" & ' ++ f'{self.exe} -c "import sys; print(sys.executable)" & ' ++ f'{self.exe} -c "import os; print(os.environ[\'VIRTUAL_ENV\'])" & ' ++ 'deactivate') ++ out, err = check_output([test_batch]) ++ lines = out.splitlines() ++ self.assertTrue(env_name.encode() in lines[0]) ++ self.assertEndsWith(lines[1], env_name.encode()) ++ + @unittest.skipUnless(os.name == 'nt', 'only relevant on Windows') + def test_unicode_in_batch_file(self): + """ +diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py +index 6bce308..4403f2b 100644 +--- a/Lib/venv/__init__.py ++++ b/Lib/venv/__init__.py +@@ -11,6 +11,7 @@ import subprocess + import sys + import sysconfig + import types ++import shlex + + + CORE_VENV_DEPS = ('pip', 'setuptools') +@@ -394,11 +395,41 @@ class EnvBuilder: + :param context: The information for the environment creation request + being processed. + """ +- text = text.replace('__VENV_DIR__', context.env_dir) +- text = text.replace('__VENV_NAME__', context.env_name) +- text = text.replace('__VENV_PROMPT__', context.prompt) +- text = text.replace('__VENV_BIN_NAME__', context.bin_name) +- text = text.replace('__VENV_PYTHON__', context.env_exe) ++ replacements = { ++ '__VENV_DIR__': context.env_dir, ++ '__VENV_NAME__': context.env_name, ++ '__VENV_PROMPT__': context.prompt, ++ '__VENV_BIN_NAME__': context.bin_name, ++ '__VENV_PYTHON__': context.env_exe, ++ } ++ ++ def quote_ps1(s): ++ """ ++ This should satisfy PowerShell quoting rules [1], unless the quoted ++ string is passed directly to Windows native commands [2]. ++ [1]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_quoting_rules ++ [2]: https://learn.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_parsing#passing-arguments-that-contain-quote-characters ++ """ ++ s = s.replace("'", "''") ++ return f"'{s}'" ++ ++ def quote_bat(s): ++ return s ++ ++ # gh-124651: need to quote the template strings properly ++ quote = shlex.quote ++ script_path = context.script_path ++ if script_path.endswith('.ps1'): ++ quote = quote_ps1 ++ elif script_path.endswith('.bat'): ++ quote = quote_bat ++ else: ++ # fallbacks to POSIX shell compliant quote ++ quote = shlex.quote ++ ++ replacements = {key: quote(s) for key, s in replacements.items()} ++ for key, quoted in replacements.items(): ++ text = text.replace(key, quoted) + return text + + def install_scripts(self, context, path): +@@ -438,6 +469,7 @@ class EnvBuilder: + with open(srcfile, 'rb') as f: + data = f.read() + if not srcfile.endswith(('.exe', '.pdb')): ++ context.script_path = srcfile + try: + data = data.decode('utf-8') + data = self.replace_variables(data, context) +diff --git a/Lib/venv/scripts/common/activate b/Lib/venv/scripts/common/activate +index 6fbc2b8..104399d 100644 +--- a/Lib/venv/scripts/common/activate ++++ b/Lib/venv/scripts/common/activate +@@ -38,11 +38,11 @@ deactivate () { + # unset irrelevant variables + deactivate nondestructive + +-VIRTUAL_ENV="__VENV_DIR__" ++VIRTUAL_ENV=__VENV_DIR__ + export VIRTUAL_ENV + + _OLD_VIRTUAL_PATH="$PATH" +-PATH="$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH" ++PATH="$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH" + export PATH + + # unset PYTHONHOME if set +@@ -55,9 +55,9 @@ fi + + if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then + _OLD_VIRTUAL_PS1="${PS1:-}" +- PS1="__VENV_PROMPT__${PS1:-}" ++ PS1=__VENV_PROMPT__"${PS1:-}" + export PS1 +- VIRTUAL_ENV_PROMPT="__VENV_PROMPT__" ++ VIRTUAL_ENV_PROMPT=__VENV_PROMPT__ + export VIRTUAL_ENV_PROMPT + fi + +diff --git a/Lib/venv/scripts/nt/activate.bat b/Lib/venv/scripts/nt/activate.bat +index 5daa45a..c2c6dd2 100644 +--- a/Lib/venv/scripts/nt/activate.bat ++++ b/Lib/venv/scripts/nt/activate.bat +@@ -8,7 +8,7 @@ if defined _OLD_CODEPAGE ( + "%SystemRoot%\System32\chcp.com" 65001 > nul + ) + +-set VIRTUAL_ENV=__VENV_DIR__ ++set "VIRTUAL_ENV=__VENV_DIR__" + + if not defined PROMPT set PROMPT=$P$G + +@@ -24,8 +24,8 @@ set PYTHONHOME= + if defined _OLD_VIRTUAL_PATH set PATH=%_OLD_VIRTUAL_PATH% + if not defined _OLD_VIRTUAL_PATH set _OLD_VIRTUAL_PATH=%PATH% + +-set PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH% +-set VIRTUAL_ENV_PROMPT=__VENV_PROMPT__ ++set "PATH=%VIRTUAL_ENV%\__VENV_BIN_NAME__;%PATH%" ++set "VIRTUAL_ENV_PROMPT=__VENV_PROMPT__" + + :END + if defined _OLD_CODEPAGE ( +diff --git a/Lib/venv/scripts/posix/activate.csh b/Lib/venv/scripts/posix/activate.csh +index d6f697c..c477021 100644 +--- a/Lib/venv/scripts/posix/activate.csh ++++ b/Lib/venv/scripts/posix/activate.csh +@@ -8,17 +8,17 @@ alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PA + # Unset irrelevant variables. + deactivate nondestructive + +-setenv VIRTUAL_ENV "__VENV_DIR__" ++setenv VIRTUAL_ENV __VENV_DIR__ + + set _OLD_VIRTUAL_PATH="$PATH" +-setenv PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__:$PATH" ++setenv PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__":$PATH" + + + set _OLD_VIRTUAL_PROMPT="$prompt" + + if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then +- set prompt = "__VENV_PROMPT__$prompt" +- setenv VIRTUAL_ENV_PROMPT "__VENV_PROMPT__" ++ set prompt = __VENV_PROMPT__"$prompt" ++ setenv VIRTUAL_ENV_PROMPT __VENV_PROMPT__ + endif + + alias pydoc python -m pydoc +diff --git a/Lib/venv/scripts/posix/activate.fish b/Lib/venv/scripts/posix/activate.fish +index 9aa4446..dc3a6c8 100644 +--- a/Lib/venv/scripts/posix/activate.fish ++++ b/Lib/venv/scripts/posix/activate.fish +@@ -33,10 +33,10 @@ end + # Unset irrelevant variables. + deactivate nondestructive + +-set -gx VIRTUAL_ENV "__VENV_DIR__" ++set -gx VIRTUAL_ENV __VENV_DIR__ + + set -gx _OLD_VIRTUAL_PATH $PATH +-set -gx PATH "$VIRTUAL_ENV/__VENV_BIN_NAME__" $PATH ++set -gx PATH "$VIRTUAL_ENV/"__VENV_BIN_NAME__ $PATH + + # Unset PYTHONHOME if set. + if set -q PYTHONHOME +@@ -56,7 +56,7 @@ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" + set -l old_status $status + + # Output the venv prompt; color taken from the blue of the Python logo. +- printf "%s%s%s" (set_color 4B8BBE) "__VENV_PROMPT__" (set_color normal) ++ printf "%s%s%s" (set_color 4B8BBE) __VENV_PROMPT__ (set_color normal) + + # Restore the return status of the previous command. + echo "exit $old_status" | . +@@ -65,5 +65,5 @@ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" + end + + set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" +- set -gx VIRTUAL_ENV_PROMPT "__VENV_PROMPT__" ++ set -gx VIRTUAL_ENV_PROMPT __VENV_PROMPT__ + end diff --git a/CVE-2025-4517.patch b/CVE-2025-4517.patch new file mode 100644 index 0000000000000000000000000000000000000000..bcf6c7a4ca9427e135c20447f488992224b9bd5e --- /dev/null +++ b/CVE-2025-4517.patch @@ -0,0 +1,1855 @@ +From 4633f3f497b1ff70e4a35b6fe2c907cbe2d4cb2e Mon Sep 17 00:00:00 2001 +From: "T. Wouters" +Date: Tue, 3 Jun 2025 16:58:39 +0200 +Subject: [PATCH] [3.11] gh-135034: Normalize link targets in tarfile, add + `os.path.realpath(strict='allow_missing')` (GH-135037) (GH-135068) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Addresses CVEs 2024-12718, 2025-4138, 2025-4330, and 2025-4517. +(cherry picked from commit 3612d8f51741b11f36f8fb0494d79086bac9390a) +(cherry picked from commit c358142cab7ce621a2745262a90df967b357f61c) + +Co-authored-by: Łukasz Langa +Signed-off-by: Łukasz Langa +Co-authored-by: Petr Viktorin +Co-authored-by: Seth Michael Larson +Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> +Co-authored-by: Serhiy Storchaka +--- + Doc/library/os.path.rst | 33 +- + Doc/library/tarfile.rst | 20 ++ + Lib/genericpath.py | 11 +- + Lib/ntpath.py | 37 ++- + Lib/posixpath.py | 15 +- + Lib/tarfile.py | 161 +++++++-- + Lib/test/test_ntpath.py | 237 +++++++++++-- + Lib/test/test_posixpath.py | 289 +++++++++++++--- + Lib/test/test_tarfile.py | 312 ++++++++++++++++-- + ...-06-02-11-32-23.gh-issue-135034.RLGjbp.rst | 6 + + 11 files changed, 1017 insertions(+), 138 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst + +diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst +index 8f6dffd04fa54a..f08c1fe1aff38a 100644 +--- a/Doc/library/os.path.rst ++++ b/Doc/library/os.path.rst +@@ -352,10 +352,26 @@ the :mod:`glob` module.) + links encountered in the path (if they are supported by the operating + system). + +- If a path doesn't exist or a symlink loop is encountered, and *strict* is +- ``True``, :exc:`OSError` is raised. If *strict* is ``False``, the path is +- resolved as far as possible and any remainder is appended without checking +- whether it exists. ++ By default, the path is evaluated up to the first component that does not ++ exist, is a symlink loop, or whose evaluation raises :exc:`OSError`. ++ All such components are appended unchanged to the existing part of the path. ++ ++ Some errors that are handled this way include "access denied", "not a ++ directory", or "bad argument to internal function". Thus, the ++ resulting path may be missing or inaccessible, may still contain ++ links or loops, and may traverse non-directories. ++ ++ This behavior can be modified by keyword arguments: ++ ++ If *strict* is ``True``, the first error encountered when evaluating the path is ++ re-raised. ++ In particular, :exc:`FileNotFoundError` is raised if *path* does not exist, ++ or another :exc:`OSError` if it is otherwise inaccessible. ++ ++ If *strict* is :py:data:`os.path.ALLOW_MISSING`, errors other than ++ :exc:`FileNotFoundError` are re-raised (as with ``strict=True``). ++ Thus, the returned path will not contain any symbolic links, but the named ++ file and some of its parent directories may be missing. + + .. note:: + This function emulates the operating system's procedure for making a path +@@ -374,6 +390,15 @@ the :mod:`glob` module.) + .. versionchanged:: 3.10 + The *strict* parameter was added. + ++ .. versionchanged:: next ++ The :py:data:`~os.path.ALLOW_MISSING` value for the *strict* parameter ++ was added. ++ ++.. data:: ALLOW_MISSING ++ ++ Special value used for the *strict* argument in :func:`realpath`. ++ ++ .. versionadded:: next + + .. function:: relpath(path, start=os.curdir) + +diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst +index 32d77d5d87bf82..f3907cc69f2692 100644 +--- a/Doc/library/tarfile.rst ++++ b/Doc/library/tarfile.rst +@@ -239,6 +239,15 @@ The :mod:`tarfile` module defines the following exceptions: + Raised to refuse extracting a symbolic link pointing outside the destination + directory. + ++.. exception:: LinkFallbackError ++ ++ Raised to refuse emulating a link (hard or symbolic) by extracting another ++ archive member, when that member would be rejected by the filter location. ++ The exception that was raised to reject the replacement member is available ++ as :attr:`!BaseException.__context__`. ++ ++ .. versionadded:: next ++ + + The following constants are available at the module level: + +@@ -1037,6 +1046,12 @@ reused in custom filters: + Implements the ``'data'`` filter. + In addition to what ``tar_filter`` does: + ++ - Normalize link targets (:attr:`TarInfo.linkname`) using ++ :func:`os.path.normpath`. ++ Note that this removes internal ``..`` components, which may change the ++ meaning of the link if the path in :attr:`!TarInfo.linkname` traverses ++ symbolic links. ++ + - :ref:`Refuse ` to extract links (hard or soft) + that link to absolute paths, or ones that link outside the destination. + +@@ -1065,6 +1080,10 @@ reused in custom filters: + + Return the modified ``TarInfo`` member. + ++ .. versionchanged:: next ++ ++ Link targets are now normalized. ++ + + .. _tarfile-extraction-refuse: + +@@ -1091,6 +1110,7 @@ Here is an incomplete list of things to consider: + * Extract to a :func:`new temporary directory ` + to prevent e.g. exploiting pre-existing links, and to make it easier to + clean up after a failed extraction. ++* Disallow symbolic links if you do not need the functionality. + * When working with untrusted data, use external (e.g. OS-level) limits on + disk, memory and CPU usage. + * Check filenames against an allow-list of characters +diff --git a/Lib/genericpath.py b/Lib/genericpath.py +index ce36451a3af01c..ad8d47b41d4ce6 100644 +--- a/Lib/genericpath.py ++++ b/Lib/genericpath.py +@@ -8,7 +8,7 @@ + + __all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', + 'getsize', 'isdir', 'isfile', 'samefile', 'sameopenfile', +- 'samestat'] ++ 'samestat', 'ALLOW_MISSING'] + + + # Does a path exist? +@@ -153,3 +153,12 @@ def _check_arg_types(funcname, *args): + f'os.PathLike object, not {s.__class__.__name__!r}') from None + if hasstr and hasbytes: + raise TypeError("Can't mix strings and bytes in path components") from None ++ ++# A singleton with a true boolean value. ++@object.__new__ ++class ALLOW_MISSING: ++ """Special value for use in realpath().""" ++ def __repr__(self): ++ return 'os.path.ALLOW_MISSING' ++ def __reduce__(self): ++ return self.__class__.__name__ +diff --git a/Lib/ntpath.py b/Lib/ntpath.py +index 0246419485da0d..ebc55eb891082e 100644 +--- a/Lib/ntpath.py ++++ b/Lib/ntpath.py +@@ -30,7 +30,8 @@ + "ismount", "expanduser","expandvars","normpath","abspath", + "curdir","pardir","sep","pathsep","defpath","altsep", + "extsep","devnull","realpath","supports_unicode_filenames","relpath", +- "samefile", "sameopenfile", "samestat", "commonpath"] ++ "samefile", "sameopenfile", "samestat", "commonpath", ++ "ALLOW_MISSING"] + + def _get_bothseps(path): + if isinstance(path, bytes): +@@ -578,9 +579,10 @@ def abspath(path): + from nt import _getfinalpathname, readlink as _nt_readlink + except ImportError: + # realpath is a no-op on systems without _getfinalpathname support. +- realpath = abspath ++ def realpath(path, *, strict=False): ++ return abspath(path) + else: +- def _readlink_deep(path): ++ def _readlink_deep(path, ignored_error=OSError): + # These error codes indicate that we should stop reading links and + # return the path we currently have. + # 1: ERROR_INVALID_FUNCTION +@@ -613,7 +615,7 @@ def _readlink_deep(path): + path = old_path + break + path = normpath(join(dirname(old_path), path)) +- except OSError as ex: ++ except ignored_error as ex: + if ex.winerror in allowed_winerror: + break + raise +@@ -622,7 +624,7 @@ def _readlink_deep(path): + break + return path + +- def _getfinalpathname_nonstrict(path): ++ def _getfinalpathname_nonstrict(path, ignored_error=OSError): + # These error codes indicate that we should stop resolving the path + # and return the value we currently have. + # 1: ERROR_INVALID_FUNCTION +@@ -649,17 +651,18 @@ def _getfinalpathname_nonstrict(path): + try: + path = _getfinalpathname(path) + return join(path, tail) if tail else path +- except OSError as ex: ++ except ignored_error as ex: + if ex.winerror not in allowed_winerror: + raise + try: + # The OS could not resolve this path fully, so we attempt + # to follow the link ourselves. If we succeed, join the tail + # and return. +- new_path = _readlink_deep(path) ++ new_path = _readlink_deep(path, ++ ignored_error=ignored_error) + if new_path != path: + return join(new_path, tail) if tail else new_path +- except OSError: ++ except ignored_error: + # If we fail to readlink(), let's keep traversing + pass + path, name = split(path) +@@ -690,6 +693,15 @@ def realpath(path, *, strict=False): + if normcase(path) == normcase(devnull): + return '\\\\.\\NUL' + had_prefix = path.startswith(prefix) ++ ++ if strict is ALLOW_MISSING: ++ ignored_error = FileNotFoundError ++ strict = True ++ elif strict: ++ ignored_error = () ++ else: ++ ignored_error = OSError ++ + if not had_prefix and not isabs(path): + path = join(cwd, path) + try: +@@ -697,17 +709,16 @@ def realpath(path, *, strict=False): + initial_winerror = 0 + except ValueError as ex: + # gh-106242: Raised for embedded null characters +- # In strict mode, we convert into an OSError. ++ # In strict modes, we convert into an OSError. + # Non-strict mode returns the path as-is, since we've already + # made it absolute. + if strict: + raise OSError(str(ex)) from None + path = normpath(path) +- except OSError as ex: +- if strict: +- raise ++ except ignored_error as ex: + initial_winerror = ex.winerror +- path = _getfinalpathname_nonstrict(path) ++ path = _getfinalpathname_nonstrict(path, ++ ignored_error=ignored_error) + # The path returned by _getfinalpathname will always start with \\?\ - + # strip off that prefix unless it was already provided on the original + # path. +diff --git a/Lib/posixpath.py b/Lib/posixpath.py +index 5b4d78bca06132..ce71a477b21928 100644 +--- a/Lib/posixpath.py ++++ b/Lib/posixpath.py +@@ -35,7 +35,7 @@ + "samefile","sameopenfile","samestat", + "curdir","pardir","sep","pathsep","defpath","altsep","extsep", + "devnull","realpath","supports_unicode_filenames","relpath", +- "commonpath"] ++ "commonpath", "ALLOW_MISSING"] + + + def _get_sep(path): +@@ -427,6 +427,15 @@ def _joinrealpath(path, rest, strict, seen): + sep = '/' + curdir = '.' + pardir = '..' ++ getcwd = os.getcwd ++ if strict is ALLOW_MISSING: ++ ignored_error = FileNotFoundError ++ elif strict: ++ ignored_error = () ++ else: ++ ignored_error = OSError ++ ++ maxlinks = None + + if isabs(rest): + rest = rest[1:] +@@ -449,9 +458,7 @@ def _joinrealpath(path, rest, strict, seen): + newpath = join(path, name) + try: + st = os.lstat(newpath) +- except OSError: +- if strict: +- raise ++ except ignored_error: + is_link = False + else: + is_link = stat.S_ISLNK(st.st_mode) +diff --git a/Lib/tarfile.py b/Lib/tarfile.py +index 0d6b925533b63d..2423e14bc540d8 100755 +--- a/Lib/tarfile.py ++++ b/Lib/tarfile.py +@@ -751,10 +751,22 @@ def __init__(self, tarinfo, path): + super().__init__(f'{tarinfo.name!r} would link to {path!r}, ' + + 'which is outside the destination') + ++class LinkFallbackError(FilterError): ++ def __init__(self, tarinfo, path): ++ self.tarinfo = tarinfo ++ self._path = path ++ super().__init__(f'link {tarinfo.name!r} would be extracted as a ' ++ + f'copy of {path!r}, which was rejected') ++ ++# Errors caused by filters -- both "fatal" and "non-fatal" -- that ++# we consider to be issues with the argument, rather than a bug in the ++# filter function ++_FILTER_ERRORS = (FilterError, OSError, ExtractError) ++ + def _get_filtered_attrs(member, dest_path, for_data=True): + new_attrs = {} + name = member.name +- dest_path = os.path.realpath(dest_path) ++ dest_path = os.path.realpath(dest_path, strict=os.path.ALLOW_MISSING) + # Strip leading / (tar's directory separator) from filenames. + # Include os.sep (target OS directory separator) as well. + if name.startswith(('/', os.sep)): +@@ -764,7 +776,8 @@ def _get_filtered_attrs(member, dest_path, for_data=True): + # For example, 'C:/foo' on Windows. + raise AbsolutePathError(member) + # Ensure we stay in the destination +- target_path = os.path.realpath(os.path.join(dest_path, name)) ++ target_path = os.path.realpath(os.path.join(dest_path, name), ++ strict=os.path.ALLOW_MISSING) + if os.path.commonpath([target_path, dest_path]) != dest_path: + raise OutsideDestinationError(member, target_path) + # Limit permissions (no high bits, and go-w) +@@ -802,6 +815,9 @@ def _get_filtered_attrs(member, dest_path, for_data=True): + if member.islnk() or member.issym(): + if os.path.isabs(member.linkname): + raise AbsoluteLinkError(member) ++ normalized = os.path.normpath(member.linkname) ++ if normalized != member.linkname: ++ new_attrs['linkname'] = normalized + if member.issym(): + target_path = os.path.join(dest_path, + os.path.dirname(name), +@@ -809,7 +825,8 @@ def _get_filtered_attrs(member, dest_path, for_data=True): + else: + target_path = os.path.join(dest_path, + member.linkname) +- target_path = os.path.realpath(target_path) ++ target_path = os.path.realpath(target_path, ++ strict=os.path.ALLOW_MISSING) + if os.path.commonpath([target_path, dest_path]) != dest_path: + raise LinkOutsideDestinationError(member, target_path) + return new_attrs +@@ -2283,30 +2300,58 @@ def extractall(self, path=".", members=None, *, numeric_owner=False, + members = self + + for member in members: +- tarinfo = self._get_extract_tarinfo(member, filter_function, path) ++ tarinfo, unfiltered = self._get_extract_tarinfo( ++ member, filter_function, path) + if tarinfo is None: + continue + if tarinfo.isdir(): + # For directories, delay setting attributes until later, + # since permissions can interfere with extraction and + # extracting contents can reset mtime. +- directories.append(tarinfo) ++ directories.append(unfiltered) + self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(), +- numeric_owner=numeric_owner) ++ numeric_owner=numeric_owner, ++ filter_function=filter_function) + + # Reverse sort directories. + directories.sort(key=lambda a: a.name, reverse=True) + ++ + # Set correct owner, mtime and filemode on directories. +- for tarinfo in directories: +- dirpath = os.path.join(path, tarinfo.name) ++ for unfiltered in directories: + try: ++ # Need to re-apply any filter, to take the *current* filesystem ++ # state into account. ++ try: ++ tarinfo = filter_function(unfiltered, path) ++ except _FILTER_ERRORS as exc: ++ self._log_no_directory_fixup(unfiltered, repr(exc)) ++ continue ++ if tarinfo is None: ++ self._log_no_directory_fixup(unfiltered, ++ 'excluded by filter') ++ continue ++ dirpath = os.path.join(path, tarinfo.name) ++ try: ++ lstat = os.lstat(dirpath) ++ except FileNotFoundError: ++ self._log_no_directory_fixup(tarinfo, 'missing') ++ continue ++ if not stat.S_ISDIR(lstat.st_mode): ++ # This is no longer a directory; presumably a later ++ # member overwrote the entry. ++ self._log_no_directory_fixup(tarinfo, 'not a directory') ++ continue + self.chown(tarinfo, dirpath, numeric_owner=numeric_owner) + self.utime(tarinfo, dirpath) + self.chmod(tarinfo, dirpath) + except ExtractError as e: + self._handle_nonfatal_error(e) + ++ def _log_no_directory_fixup(self, member, reason): ++ self._dbg(2, "tarfile: Not fixing up directory %r (%s)" % ++ (member.name, reason)) ++ + def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, + filter=None): + """Extract a member from the archive to the current working directory, +@@ -2322,41 +2367,56 @@ def extract(self, member, path="", set_attrs=True, *, numeric_owner=False, + String names of common filters are accepted. + """ + filter_function = self._get_filter_function(filter) +- tarinfo = self._get_extract_tarinfo(member, filter_function, path) ++ tarinfo, unfiltered = self._get_extract_tarinfo( ++ member, filter_function, path) + if tarinfo is not None: + self._extract_one(tarinfo, path, set_attrs, numeric_owner) + + def _get_extract_tarinfo(self, member, filter_function, path): +- """Get filtered TarInfo (or None) from member, which might be a str""" ++ """Get (filtered, unfiltered) TarInfos from *member* ++ ++ *member* might be a string. ++ ++ Return (None, None) if not found. ++ """ ++ + if isinstance(member, str): +- tarinfo = self.getmember(member) ++ unfiltered = self.getmember(member) + else: +- tarinfo = member ++ unfiltered = member + +- unfiltered = tarinfo ++ filtered = None + try: +- tarinfo = filter_function(tarinfo, path) ++ filtered = filter_function(unfiltered, path) + except (OSError, FilterError) as e: + self._handle_fatal_error(e) + except ExtractError as e: + self._handle_nonfatal_error(e) +- if tarinfo is None: ++ if filtered is None: + self._dbg(2, "tarfile: Excluded %r" % unfiltered.name) +- return None ++ return None, None ++ + # Prepare the link target for makelink(). +- if tarinfo.islnk(): +- tarinfo = copy.copy(tarinfo) +- tarinfo._link_target = os.path.join(path, tarinfo.linkname) +- return tarinfo ++ if filtered.islnk(): ++ filtered = copy.copy(filtered) ++ filtered._link_target = os.path.join(path, filtered.linkname) ++ return filtered, unfiltered + +- def _extract_one(self, tarinfo, path, set_attrs, numeric_owner): +- """Extract from filtered tarinfo to disk""" ++ def _extract_one(self, tarinfo, path, set_attrs, numeric_owner, ++ filter_function=None): ++ """Extract from filtered tarinfo to disk. ++ ++ filter_function is only used when extracting a *different* ++ member (e.g. as fallback to creating a symlink) ++ """ + self._check("r") + + try: + self._extract_member(tarinfo, os.path.join(path, tarinfo.name), + set_attrs=set_attrs, +- numeric_owner=numeric_owner) ++ numeric_owner=numeric_owner, ++ filter_function=filter_function, ++ extraction_root=path) + except OSError as e: + self._handle_fatal_error(e) + except ExtractError as e: +@@ -2414,9 +2474,13 @@ def extractfile(self, member): + return None + + def _extract_member(self, tarinfo, targetpath, set_attrs=True, +- numeric_owner=False): +- """Extract the TarInfo object tarinfo to a physical ++ numeric_owner=False, *, filter_function=None, ++ extraction_root=None): ++ """Extract the filtered TarInfo object tarinfo to a physical + file called targetpath. ++ ++ filter_function is only used when extracting a *different* ++ member (e.g. as fallback to creating a symlink) + """ + # Fetch the TarInfo object for the given name + # and build the destination pathname, replacing +@@ -2445,7 +2509,10 @@ def _extract_member(self, tarinfo, targetpath, set_attrs=True, + elif tarinfo.ischr() or tarinfo.isblk(): + self.makedev(tarinfo, targetpath) + elif tarinfo.islnk() or tarinfo.issym(): +- self.makelink(tarinfo, targetpath) ++ self.makelink_with_filter( ++ tarinfo, targetpath, ++ filter_function=filter_function, ++ extraction_root=extraction_root) + elif tarinfo.type not in SUPPORTED_TYPES: + self.makeunknown(tarinfo, targetpath) + else: +@@ -2528,10 +2595,18 @@ def makedev(self, tarinfo, targetpath): + os.makedev(tarinfo.devmajor, tarinfo.devminor)) + + def makelink(self, tarinfo, targetpath): ++ return self.makelink_with_filter(tarinfo, targetpath, None, None) ++ ++ def makelink_with_filter(self, tarinfo, targetpath, ++ filter_function, extraction_root): + """Make a (symbolic) link called targetpath. If it cannot be created + (platform limitation), we try to make a copy of the referenced file + instead of a link. ++ ++ filter_function is only used when extracting a *different* ++ member (e.g. as fallback to creating a link). + """ ++ keyerror_to_extracterror = False + try: + # For systems that support symbolic and hard links. + if tarinfo.issym(): +@@ -2539,18 +2614,38 @@ def makelink(self, tarinfo, targetpath): + # Avoid FileExistsError on following os.symlink. + os.unlink(targetpath) + os.symlink(tarinfo.linkname, targetpath) ++ return + else: + if os.path.exists(tarinfo._link_target): + os.link(tarinfo._link_target, targetpath) +- else: +- self._extract_member(self._find_link_target(tarinfo), +- targetpath) ++ return + except symlink_exception: ++ keyerror_to_extracterror = True ++ ++ try: ++ unfiltered = self._find_link_target(tarinfo) ++ except KeyError: ++ if keyerror_to_extracterror: ++ raise ExtractError( ++ "unable to resolve link inside archive") from None ++ else: ++ raise ++ ++ if filter_function is None: ++ filtered = unfiltered ++ else: ++ if extraction_root is None: ++ raise ExtractError( ++ "makelink_with_filter: if filter_function is not None, " ++ + "extraction_root must also not be None") + try: +- self._extract_member(self._find_link_target(tarinfo), +- targetpath) +- except KeyError: +- raise ExtractError("unable to resolve link inside archive") from None ++ filtered = filter_function(unfiltered, extraction_root) ++ except _FILTER_ERRORS as cause: ++ raise LinkFallbackError(tarinfo, unfiltered.name) from cause ++ if filtered is not None: ++ self._extract_member(filtered, targetpath, ++ filter_function=filter_function, ++ extraction_root=extraction_root) + + def chown(self, tarinfo, targetpath, numeric_owner): + """Set owner of targetpath according to tarinfo. If numeric_owner +diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py +index 88660fc05a1b10..7d0c0a095bc50a 100644 +--- a/Lib/test/test_ntpath.py ++++ b/Lib/test/test_ntpath.py +@@ -1,9 +1,11 @@ + import ntpath + import os + import string ++import subprocess + import sys + import unittest + import warnings ++from ntpath import ALLOW_MISSING + from test.support import os_helper + from test.support import TestFailed, is_emscripten + from test.support.os_helper import FakePath +@@ -75,6 +77,27 @@ def tester(fn, wantResult): + %(str(fn), str(wantResult), repr(gotResult))) + + ++def _parameterize(*parameters): ++ """Simplistic decorator to parametrize a test ++ ++ Runs the decorated test multiple times in subTest, with a value from ++ 'parameters' passed as an extra positional argument. ++ Calls doCleanups() after each run. ++ ++ Not for general use. Intended to avoid indenting for easier backports. ++ ++ See https://discuss.python.org/t/91827 for discussing generalizations. ++ """ ++ def _parametrize_decorator(func): ++ def _parameterized(self, *args, **kwargs): ++ for parameter in parameters: ++ with self.subTest(parameter): ++ func(self, *args, parameter, **kwargs) ++ self.doCleanups() ++ return _parameterized ++ return _parametrize_decorator ++ ++ + class NtpathTestCase(unittest.TestCase): + def assertPathEqual(self, path1, path2): + if path1 == path2 or _norm(path1) == _norm(path2): +@@ -297,6 +320,27 @@ def test_realpath_curdir(self): + tester("ntpath.realpath('.\\.')", expected) + tester("ntpath.realpath('\\'.join(['.'] * 100))", expected) + ++ def test_realpath_curdir_strict(self): ++ expected = ntpath.normpath(os.getcwd()) ++ tester("ntpath.realpath('.', strict=True)", expected) ++ tester("ntpath.realpath('./.', strict=True)", expected) ++ tester("ntpath.realpath('/'.join(['.'] * 100), strict=True)", expected) ++ tester("ntpath.realpath('.\\.', strict=True)", expected) ++ tester("ntpath.realpath('\\'.join(['.'] * 100), strict=True)", expected) ++ ++ def test_realpath_curdir_missing_ok(self): ++ expected = ntpath.normpath(os.getcwd()) ++ tester("ntpath.realpath('.', strict=ALLOW_MISSING)", ++ expected) ++ tester("ntpath.realpath('./.', strict=ALLOW_MISSING)", ++ expected) ++ tester("ntpath.realpath('/'.join(['.'] * 100), strict=ALLOW_MISSING)", ++ expected) ++ tester("ntpath.realpath('.\\.', strict=ALLOW_MISSING)", ++ expected) ++ tester("ntpath.realpath('\\'.join(['.'] * 100), strict=ALLOW_MISSING)", ++ expected) ++ + def test_realpath_pardir(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('..')", ntpath.dirname(expected)) +@@ -309,17 +353,43 @@ def test_realpath_pardir(self): + tester("ntpath.realpath('\\'.join(['..'] * 50))", + ntpath.splitdrive(expected)[0] + '\\') + ++ def test_realpath_pardir_strict(self): ++ expected = ntpath.normpath(os.getcwd()) ++ tester("ntpath.realpath('..', strict=True)", ntpath.dirname(expected)) ++ tester("ntpath.realpath('../..', strict=True)", ++ ntpath.dirname(ntpath.dirname(expected))) ++ tester("ntpath.realpath('/'.join(['..'] * 50), strict=True)", ++ ntpath.splitdrive(expected)[0] + '\\') ++ tester("ntpath.realpath('..\\..', strict=True)", ++ ntpath.dirname(ntpath.dirname(expected))) ++ tester("ntpath.realpath('\\'.join(['..'] * 50), strict=True)", ++ ntpath.splitdrive(expected)[0] + '\\') ++ ++ def test_realpath_pardir_missing_ok(self): ++ expected = ntpath.normpath(os.getcwd()) ++ tester("ntpath.realpath('..', strict=ALLOW_MISSING)", ++ ntpath.dirname(expected)) ++ tester("ntpath.realpath('../..', strict=ALLOW_MISSING)", ++ ntpath.dirname(ntpath.dirname(expected))) ++ tester("ntpath.realpath('/'.join(['..'] * 50), strict=ALLOW_MISSING)", ++ ntpath.splitdrive(expected)[0] + '\\') ++ tester("ntpath.realpath('..\\..', strict=ALLOW_MISSING)", ++ ntpath.dirname(ntpath.dirname(expected))) ++ tester("ntpath.realpath('\\'.join(['..'] * 50), strict=ALLOW_MISSING)", ++ ntpath.splitdrive(expected)[0] + '\\') ++ + @os_helper.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') +- def test_realpath_basic(self): ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_basic(self, kwargs): + ABSTFN = ntpath.abspath(os_helper.TESTFN) + open(ABSTFN, "wb").close() + self.addCleanup(os_helper.unlink, ABSTFN) + self.addCleanup(os_helper.unlink, ABSTFN + "1") + + os.symlink(ABSTFN, ABSTFN + "1") +- self.assertPathEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN) +- self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")), ++ self.assertPathEqual(ntpath.realpath(ABSTFN + "1", **kwargs), ABSTFN) ++ self.assertPathEqual(ntpath.realpath(os.fsencode(ABSTFN + "1"), **kwargs), + os.fsencode(ABSTFN)) + + # gh-88013: call ntpath.realpath with binary drive name may raise a +@@ -333,8 +403,13 @@ def test_realpath_basic(self): + self.assertEqual(ntpath.realpath(d), d) + + # gh-106242: Embedded nulls and non-strict fallback to abspath +- self.assertEqual(ABSTFN + "\0spam", +- ntpath.realpath(os_helper.TESTFN + "\0spam", strict=False)) ++ if kwargs: ++ with self.assertRaises(OSError): ++ ntpath.realpath(os_helper.TESTFN + "\0spam", ++ **kwargs) ++ else: ++ self.assertEqual(ABSTFN + "\0spam", ++ ntpath.realpath(os_helper.TESTFN + "\0spam", **kwargs)) + + @os_helper.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') +@@ -346,19 +421,77 @@ def test_realpath_strict(self): + self.addCleanup(os_helper.unlink, ABSTFN) + self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN, strict=True) + self.assertRaises(FileNotFoundError, ntpath.realpath, ABSTFN + "2", strict=True) ++ ++ @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') ++ def test_realpath_invalid_paths(self): ++ realpath = ntpath.realpath ++ ABSTFN = ntpath.abspath(os_helper.TESTFN) ++ ABSTFNb = os.fsencode(ABSTFN) ++ path = ABSTFN + '\x00' ++ # gh-106242: Embedded nulls and non-strict fallback to abspath ++ self.assertEqual(realpath(path, strict=False), path) + # gh-106242: Embedded nulls should raise OSError (not ValueError) +- self.assertRaises(OSError, ntpath.realpath, ABSTFN + "\0spam", strict=True) ++ self.assertRaises(OSError, ntpath.realpath, path, strict=True) ++ self.assertRaises(OSError, ntpath.realpath, path, strict=ALLOW_MISSING) ++ path = ABSTFNb + b'\x00' ++ self.assertEqual(realpath(path, strict=False), path) ++ self.assertRaises(OSError, ntpath.realpath, path, strict=True) ++ self.assertRaises(OSError, ntpath.realpath, path, strict=ALLOW_MISSING) ++ path = ABSTFN + '\\nonexistent\\x\x00' ++ self.assertEqual(realpath(path, strict=False), path) ++ self.assertRaises(OSError, ntpath.realpath, path, strict=True) ++ self.assertRaises(OSError, ntpath.realpath, path, strict=ALLOW_MISSING) ++ path = ABSTFNb + b'\\nonexistent\\x\x00' ++ self.assertEqual(realpath(path, strict=False), path) ++ self.assertRaises(OSError, ntpath.realpath, path, strict=True) ++ self.assertRaises(OSError, ntpath.realpath, path, strict=ALLOW_MISSING) ++ path = ABSTFN + '\x00\\..' ++ self.assertEqual(realpath(path, strict=False), os.getcwd()) ++ self.assertEqual(realpath(path, strict=True), os.getcwd()) ++ self.assertEqual(realpath(path, strict=ALLOW_MISSING), os.getcwd()) ++ path = ABSTFNb + b'\x00\\..' ++ self.assertEqual(realpath(path, strict=False), os.getcwdb()) ++ self.assertEqual(realpath(path, strict=True), os.getcwdb()) ++ self.assertEqual(realpath(path, strict=ALLOW_MISSING), os.getcwdb()) ++ path = ABSTFN + '\\nonexistent\\x\x00\\..' ++ self.assertEqual(realpath(path, strict=False), ABSTFN + '\\nonexistent') ++ self.assertRaises(OSError, ntpath.realpath, path, strict=True) ++ self.assertEqual(realpath(path, strict=ALLOW_MISSING), ABSTFN + '\\nonexistent') ++ path = ABSTFNb + b'\\nonexistent\\x\x00\\..' ++ self.assertEqual(realpath(path, strict=False), ABSTFNb + b'\\nonexistent') ++ self.assertRaises(OSError, ntpath.realpath, path, strict=True) ++ self.assertEqual(realpath(path, strict=ALLOW_MISSING), ABSTFNb + b'\\nonexistent') ++ ++ @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_invalid_unicode_paths(self, kwargs): ++ realpath = ntpath.realpath ++ ABSTFN = ntpath.abspath(os_helper.TESTFN) ++ ABSTFNb = os.fsencode(ABSTFN) ++ path = ABSTFNb + b'\xff' ++ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs) ++ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs) ++ path = ABSTFNb + b'\\nonexistent\\\xff' ++ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs) ++ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs) ++ path = ABSTFNb + b'\xff\\..' ++ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs) ++ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs) ++ path = ABSTFNb + b'\\nonexistent\\\xff\\..' ++ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs) ++ self.assertRaises(UnicodeDecodeError, ntpath.realpath, path, **kwargs) + + @os_helper.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') +- def test_realpath_relative(self): ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_relative(self, kwargs): + ABSTFN = ntpath.abspath(os_helper.TESTFN) + open(ABSTFN, "wb").close() + self.addCleanup(os_helper.unlink, ABSTFN) + self.addCleanup(os_helper.unlink, ABSTFN + "1") + + os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1")) +- self.assertPathEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN) ++ self.assertPathEqual(ntpath.realpath(ABSTFN + "1", **kwargs), ABSTFN) + + @os_helper.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') +@@ -510,7 +643,62 @@ def test_realpath_symlink_loops_strict(self): + + @os_helper.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') +- def test_realpath_symlink_prefix(self): ++ def test_realpath_symlink_loops_raise(self): ++ # Symlink loops raise OSError in ALLOW_MISSING mode ++ ABSTFN = ntpath.abspath(os_helper.TESTFN) ++ self.addCleanup(os_helper.unlink, ABSTFN) ++ self.addCleanup(os_helper.unlink, ABSTFN + "1") ++ self.addCleanup(os_helper.unlink, ABSTFN + "2") ++ self.addCleanup(os_helper.unlink, ABSTFN + "y") ++ self.addCleanup(os_helper.unlink, ABSTFN + "c") ++ self.addCleanup(os_helper.unlink, ABSTFN + "a") ++ self.addCleanup(os_helper.unlink, ABSTFN + "x") ++ ++ os.symlink(ABSTFN, ABSTFN) ++ self.assertRaises(OSError, ntpath.realpath, ABSTFN, strict=ALLOW_MISSING) ++ ++ os.symlink(ABSTFN + "1", ABSTFN + "2") ++ os.symlink(ABSTFN + "2", ABSTFN + "1") ++ self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1", ++ strict=ALLOW_MISSING) ++ self.assertRaises(OSError, ntpath.realpath, ABSTFN + "2", ++ strict=ALLOW_MISSING) ++ self.assertRaises(OSError, ntpath.realpath, ABSTFN + "1\\x", ++ strict=ALLOW_MISSING) ++ ++ # Windows eliminates '..' components before resolving links; ++ # realpath is not expected to raise if this removes the loop. ++ self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\.."), ++ ntpath.dirname(ABSTFN)) ++ self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..\\x"), ++ ntpath.dirname(ABSTFN) + "\\x") ++ ++ os.symlink(ABSTFN + "x", ABSTFN + "y") ++ self.assertPathEqual(ntpath.realpath(ABSTFN + "1\\..\\" ++ + ntpath.basename(ABSTFN) + "y"), ++ ABSTFN + "x") ++ self.assertRaises( ++ OSError, ntpath.realpath, ++ ABSTFN + "1\\..\\" + ntpath.basename(ABSTFN) + "1", ++ strict=ALLOW_MISSING) ++ ++ os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a") ++ self.assertRaises(OSError, ntpath.realpath, ABSTFN + "a", ++ strict=ALLOW_MISSING) ++ ++ os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN)) ++ + "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c") ++ self.assertRaises(OSError, ntpath.realpath, ABSTFN + "c", ++ strict=ALLOW_MISSING) ++ ++ # Test using relative path as well. ++ self.assertRaises(OSError, ntpath.realpath, ntpath.basename(ABSTFN), ++ strict=ALLOW_MISSING) ++ ++ @os_helper.skip_unless_symlink ++ @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_symlink_prefix(self, kwargs): + ABSTFN = ntpath.abspath(os_helper.TESTFN) + self.addCleanup(os_helper.unlink, ABSTFN + "3") + self.addCleanup(os_helper.unlink, "\\\\?\\" + ABSTFN + "3.") +@@ -525,9 +713,9 @@ def test_realpath_symlink_prefix(self): + f.write(b'1') + os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link") + +- self.assertPathEqual(ntpath.realpath(ABSTFN + "3link"), ++ self.assertPathEqual(ntpath.realpath(ABSTFN + "3link", **kwargs), + ABSTFN + "3") +- self.assertPathEqual(ntpath.realpath(ABSTFN + "3.link"), ++ self.assertPathEqual(ntpath.realpath(ABSTFN + "3.link", **kwargs), + "\\\\?\\" + ABSTFN + "3.") + + # Resolved paths should be usable to open target files +@@ -537,14 +725,17 @@ def test_realpath_symlink_prefix(self): + self.assertEqual(f.read(), b'1') + + # When the prefix is included, it is not stripped +- self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"), ++ self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link", **kwargs), + "\\\\?\\" + ABSTFN + "3") +- self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"), ++ self.assertPathEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link", **kwargs), + "\\\\?\\" + ABSTFN + "3.") + + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_nul(self): + tester("ntpath.realpath('NUL')", r'\\.\NUL') ++ tester("ntpath.realpath('NUL', strict=False)", r'\\.\NUL') ++ tester("ntpath.realpath('NUL', strict=True)", r'\\.\NUL') ++ tester("ntpath.realpath('NUL', strict=ALLOW_MISSING)", r'\\.\NUL') + + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + @unittest.skipUnless(HAVE_GETSHORTPATHNAME, 'need _getshortpathname') +@@ -568,12 +759,20 @@ def test_realpath_cwd(self): + + self.assertPathEqual(test_file_long, ntpath.realpath(test_file_short)) + +- with os_helper.change_cwd(test_dir_long): +- self.assertPathEqual(test_file_long, ntpath.realpath("file.txt")) +- with os_helper.change_cwd(test_dir_long.lower()): +- self.assertPathEqual(test_file_long, ntpath.realpath("file.txt")) +- with os_helper.change_cwd(test_dir_short): +- self.assertPathEqual(test_file_long, ntpath.realpath("file.txt")) ++ for kwargs in {}, {'strict': True}, {'strict': ALLOW_MISSING}: ++ with self.subTest(**kwargs): ++ with os_helper.change_cwd(test_dir_long): ++ self.assertPathEqual( ++ test_file_long, ++ ntpath.realpath("file.txt", **kwargs)) ++ with os_helper.change_cwd(test_dir_long.lower()): ++ self.assertPathEqual( ++ test_file_long, ++ ntpath.realpath("file.txt", **kwargs)) ++ with os_helper.change_cwd(test_dir_short): ++ self.assertPathEqual( ++ test_file_long, ++ ntpath.realpath("file.txt", **kwargs)) + + def test_expandvars(self): + with os_helper.EnvironmentVarGuard() as env: +diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py +index 8a1dd131928cff..3774b7596f16a3 100644 +--- a/Lib/test/test_posixpath.py ++++ b/Lib/test/test_posixpath.py +@@ -2,7 +2,9 @@ + import posixpath + import sys + import unittest +-from posixpath import realpath, abspath, dirname, basename ++from functools import partial ++from posixpath import realpath, abspath, dirname, basename, ALLOW_MISSING ++from test import support + from test import test_genericpath + from test.support import import_helper + from test.support import os_helper +@@ -36,6 +38,26 @@ def safe_rmdir(dirname): + except OSError: + pass + ++def _parameterize(*parameters): ++ """Simplistic decorator to parametrize a test ++ ++ Runs the decorated test multiple times in subTest, with a value from ++ 'parameters' passed as an extra positional argument. ++ Does *not* call doCleanups() after each run. ++ ++ Not for general use. Intended to avoid indenting for easier backports. ++ ++ See https://discuss.python.org/t/91827 for discussing generalizations. ++ """ ++ def _parametrize_decorator(func): ++ def _parameterized(self, *args, **kwargs): ++ for parameter in parameters: ++ with self.subTest(parameter): ++ func(self, *args, parameter, **kwargs) ++ return _parameterized ++ return _parametrize_decorator ++ ++ + class PosixPathTest(unittest.TestCase): + + def setUp(self): +@@ -369,32 +391,35 @@ def test_normpath(self): + self.assertEqual(result, expected) + + @skip_if_ABSTFN_contains_backslash +- def test_realpath_curdir(self): +- self.assertEqual(realpath('.'), os.getcwd()) +- self.assertEqual(realpath('./.'), os.getcwd()) +- self.assertEqual(realpath('/'.join(['.'] * 100)), os.getcwd()) ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_curdir(self, kwargs): ++ self.assertEqual(realpath('.', **kwargs), os.getcwd()) ++ self.assertEqual(realpath('./.', **kwargs), os.getcwd()) ++ self.assertEqual(realpath('/'.join(['.'] * 100), **kwargs), os.getcwd()) + +- self.assertEqual(realpath(b'.'), os.getcwdb()) +- self.assertEqual(realpath(b'./.'), os.getcwdb()) +- self.assertEqual(realpath(b'/'.join([b'.'] * 100)), os.getcwdb()) ++ self.assertEqual(realpath(b'.', **kwargs), os.getcwdb()) ++ self.assertEqual(realpath(b'./.', **kwargs), os.getcwdb()) ++ self.assertEqual(realpath(b'/'.join([b'.'] * 100), **kwargs), os.getcwdb()) + + @skip_if_ABSTFN_contains_backslash +- def test_realpath_pardir(self): +- self.assertEqual(realpath('..'), dirname(os.getcwd())) +- self.assertEqual(realpath('../..'), dirname(dirname(os.getcwd()))) +- self.assertEqual(realpath('/'.join(['..'] * 100)), '/') ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_pardir(self, kwargs): ++ self.assertEqual(realpath('..', **kwargs), dirname(os.getcwd())) ++ self.assertEqual(realpath('../..', **kwargs), dirname(dirname(os.getcwd()))) ++ self.assertEqual(realpath('/'.join(['..'] * 100), **kwargs), '/') + +- self.assertEqual(realpath(b'..'), dirname(os.getcwdb())) +- self.assertEqual(realpath(b'../..'), dirname(dirname(os.getcwdb()))) +- self.assertEqual(realpath(b'/'.join([b'..'] * 100)), b'/') ++ self.assertEqual(realpath(b'..', **kwargs), dirname(os.getcwdb())) ++ self.assertEqual(realpath(b'../..', **kwargs), dirname(dirname(os.getcwdb()))) ++ self.assertEqual(realpath(b'/'.join([b'..'] * 100), **kwargs), b'/') + + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash +- def test_realpath_basic(self): ++ @_parameterize({}, {'strict': ALLOW_MISSING}) ++ def test_realpath_basic(self, kwargs): + # Basic operation. + try: + os.symlink(ABSTFN+"1", ABSTFN) +- self.assertEqual(realpath(ABSTFN), ABSTFN+"1") ++ self.assertEqual(realpath(ABSTFN, **kwargs), ABSTFN+"1") + finally: + os_helper.unlink(ABSTFN) + +@@ -410,15 +435,122 @@ def test_realpath_strict(self): + finally: + os_helper.unlink(ABSTFN) + ++ def test_realpath_invalid_paths(self): ++ path = '/\x00' ++ self.assertRaises(ValueError, realpath, path, strict=False) ++ self.assertRaises(ValueError, realpath, path, strict=True) ++ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) ++ path = b'/\x00' ++ self.assertRaises(ValueError, realpath, path, strict=False) ++ self.assertRaises(ValueError, realpath, path, strict=True) ++ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) ++ path = '/nonexistent/x\x00' ++ self.assertRaises(ValueError, realpath, path, strict=False) ++ self.assertRaises(FileNotFoundError, realpath, path, strict=True) ++ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) ++ path = b'/nonexistent/x\x00' ++ self.assertRaises(ValueError, realpath, path, strict=False) ++ self.assertRaises(FileNotFoundError, realpath, path, strict=True) ++ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) ++ path = '/\x00/..' ++ self.assertRaises(ValueError, realpath, path, strict=False) ++ self.assertRaises(ValueError, realpath, path, strict=True) ++ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) ++ path = b'/\x00/..' ++ self.assertRaises(ValueError, realpath, path, strict=False) ++ self.assertRaises(ValueError, realpath, path, strict=True) ++ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) ++ ++ path = '/nonexistent/x\x00/..' ++ self.assertRaises(ValueError, realpath, path, strict=False) ++ self.assertRaises(FileNotFoundError, realpath, path, strict=True) ++ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) ++ path = b'/nonexistent/x\x00/..' ++ self.assertRaises(ValueError, realpath, path, strict=False) ++ self.assertRaises(FileNotFoundError, realpath, path, strict=True) ++ self.assertRaises(ValueError, realpath, path, strict=ALLOW_MISSING) ++ ++ path = '/\udfff' ++ if sys.platform == 'win32': ++ self.assertEqual(realpath(path, strict=False), path) ++ self.assertRaises(FileNotFoundError, realpath, path, strict=True) ++ self.assertEqual(realpath(path, strict=ALLOW_MISSING), path) ++ else: ++ self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) ++ self.assertRaises(UnicodeEncodeError, realpath, path, strict=True) ++ self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) ++ path = '/nonexistent/\udfff' ++ if sys.platform == 'win32': ++ self.assertEqual(realpath(path, strict=False), path) ++ self.assertEqual(realpath(path, strict=ALLOW_MISSING), path) ++ else: ++ self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) ++ self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) ++ self.assertRaises(FileNotFoundError, realpath, path, strict=True) ++ path = '/\udfff/..' ++ if sys.platform == 'win32': ++ self.assertEqual(realpath(path, strict=False), '/') ++ self.assertRaises(FileNotFoundError, realpath, path, strict=True) ++ self.assertEqual(realpath(path, strict=ALLOW_MISSING), '/') ++ else: ++ self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) ++ self.assertRaises(UnicodeEncodeError, realpath, path, strict=True) ++ self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) ++ path = '/nonexistent/\udfff/..' ++ if sys.platform == 'win32': ++ self.assertEqual(realpath(path, strict=False), '/nonexistent') ++ self.assertEqual(realpath(path, strict=ALLOW_MISSING), '/nonexistent') ++ else: ++ self.assertRaises(UnicodeEncodeError, realpath, path, strict=False) ++ self.assertRaises(UnicodeEncodeError, realpath, path, strict=ALLOW_MISSING) ++ self.assertRaises(FileNotFoundError, realpath, path, strict=True) ++ ++ path = b'/\xff' ++ if sys.platform == 'win32': ++ self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) ++ self.assertRaises(UnicodeDecodeError, realpath, path, strict=True) ++ self.assertRaises(UnicodeDecodeError, realpath, path, strict=ALLOW_MISSING) ++ else: ++ self.assertEqual(realpath(path, strict=False), path) ++ if support.is_wasi: ++ self.assertRaises(OSError, realpath, path, strict=True) ++ self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) ++ else: ++ self.assertRaises(FileNotFoundError, realpath, path, strict=True) ++ self.assertEqual(realpath(path, strict=ALLOW_MISSING), path) ++ path = b'/nonexistent/\xff' ++ if sys.platform == 'win32': ++ self.assertRaises(UnicodeDecodeError, realpath, path, strict=False) ++ self.assertRaises(UnicodeDecodeError, realpath, path, strict=ALLOW_MISSING) ++ else: ++ self.assertEqual(realpath(path, strict=False), path) ++ if support.is_wasi: ++ self.assertRaises(OSError, realpath, path, strict=True) ++ self.assertRaises(OSError, realpath, path, strict=ALLOW_MISSING) ++ else: ++ self.assertRaises(FileNotFoundError, realpath, path, strict=True) ++ + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash +- def test_realpath_relative(self): ++ @_parameterize({}, {'strict': ALLOW_MISSING}) ++ def test_realpath_relative(self, kwargs): + try: + os.symlink(posixpath.relpath(ABSTFN+"1"), ABSTFN) +- self.assertEqual(realpath(ABSTFN), ABSTFN+"1") ++ self.assertEqual(realpath(ABSTFN, **kwargs), ABSTFN+"1") + finally: + os_helper.unlink(ABSTFN) + ++ @os_helper.skip_unless_symlink ++ @skip_if_ABSTFN_contains_backslash ++ @_parameterize({}, {'strict': ALLOW_MISSING}) ++ def test_realpath_missing_pardir(self, kwargs): ++ try: ++ os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN) ++ self.assertEqual( ++ realpath("nonexistent/../" + os_helper.TESTFN, **kwargs), ABSTFN + "1") ++ finally: ++ os_helper.unlink(os_helper.TESTFN) ++ + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash + def test_realpath_symlink_loops(self): +@@ -462,37 +594,38 @@ def test_realpath_symlink_loops(self): + + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash +- def test_realpath_symlink_loops_strict(self): ++ @_parameterize({'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_symlink_loops_strict(self, kwargs): + # Bug #43757, raise OSError if we get into an infinite symlink loop in +- # strict mode. ++ # the strict modes. + try: + os.symlink(ABSTFN, ABSTFN) +- self.assertRaises(OSError, realpath, ABSTFN, strict=True) ++ self.assertRaises(OSError, realpath, ABSTFN, **kwargs) + + os.symlink(ABSTFN+"1", ABSTFN+"2") + os.symlink(ABSTFN+"2", ABSTFN+"1") +- self.assertRaises(OSError, realpath, ABSTFN+"1", strict=True) +- self.assertRaises(OSError, realpath, ABSTFN+"2", strict=True) ++ self.assertRaises(OSError, realpath, ABSTFN+"1", **kwargs) ++ self.assertRaises(OSError, realpath, ABSTFN+"2", **kwargs) + +- self.assertRaises(OSError, realpath, ABSTFN+"1/x", strict=True) +- self.assertRaises(OSError, realpath, ABSTFN+"1/..", strict=True) +- self.assertRaises(OSError, realpath, ABSTFN+"1/../x", strict=True) ++ self.assertRaises(OSError, realpath, ABSTFN+"1/x", **kwargs) ++ self.assertRaises(OSError, realpath, ABSTFN+"1/..", **kwargs) ++ self.assertRaises(OSError, realpath, ABSTFN+"1/../x", **kwargs) + os.symlink(ABSTFN+"x", ABSTFN+"y") + self.assertRaises(OSError, realpath, +- ABSTFN+"1/../" + basename(ABSTFN) + "y", strict=True) ++ ABSTFN+"1/../" + basename(ABSTFN) + "y", **kwargs) + self.assertRaises(OSError, realpath, +- ABSTFN+"1/../" + basename(ABSTFN) + "1", strict=True) ++ ABSTFN+"1/../" + basename(ABSTFN) + "1", **kwargs) + + os.symlink(basename(ABSTFN) + "a/b", ABSTFN+"a") +- self.assertRaises(OSError, realpath, ABSTFN+"a", strict=True) ++ self.assertRaises(OSError, realpath, ABSTFN+"a", **kwargs) + + os.symlink("../" + basename(dirname(ABSTFN)) + "/" + + basename(ABSTFN) + "c", ABSTFN+"c") +- self.assertRaises(OSError, realpath, ABSTFN+"c", strict=True) ++ self.assertRaises(OSError, realpath, ABSTFN+"c", **kwargs) + + # Test using relative path as well. + with os_helper.change_cwd(dirname(ABSTFN)): +- self.assertRaises(OSError, realpath, basename(ABSTFN), strict=True) ++ self.assertRaises(OSError, realpath, basename(ABSTFN), **kwargs) + finally: + os_helper.unlink(ABSTFN) + os_helper.unlink(ABSTFN+"1") +@@ -503,13 +636,14 @@ def test_realpath_symlink_loops_strict(self): + + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash +- def test_realpath_repeated_indirect_symlinks(self): ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_repeated_indirect_symlinks(self, kwargs): + # Issue #6975. + try: + os.mkdir(ABSTFN) + os.symlink('../' + basename(ABSTFN), ABSTFN + '/self') + os.symlink('self/self/self', ABSTFN + '/link') +- self.assertEqual(realpath(ABSTFN + '/link'), ABSTFN) ++ self.assertEqual(realpath(ABSTFN + '/link', **kwargs), ABSTFN) + finally: + os_helper.unlink(ABSTFN + '/self') + os_helper.unlink(ABSTFN + '/link') +@@ -517,14 +651,15 @@ def test_realpath_repeated_indirect_symlinks(self): + + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash +- def test_realpath_deep_recursion(self): ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_deep_recursion(self, kwargs): + depth = 10 + try: + os.mkdir(ABSTFN) + for i in range(depth): + os.symlink('/'.join(['%d' % i] * 10), ABSTFN + '/%d' % (i + 1)) + os.symlink('.', ABSTFN + '/0') +- self.assertEqual(realpath(ABSTFN + '/%d' % depth), ABSTFN) ++ self.assertEqual(realpath(ABSTFN + '/%d' % depth, **kwargs), ABSTFN) + + # Test using relative path as well. + with os_helper.change_cwd(ABSTFN): +@@ -536,7 +671,8 @@ def test_realpath_deep_recursion(self): + + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash +- def test_realpath_resolve_parents(self): ++ @_parameterize({}, {'strict': ALLOW_MISSING}) ++ def test_realpath_resolve_parents(self, kwargs): + # We also need to resolve any symlinks in the parents of a relative + # path passed to realpath. E.g.: current working directory is + # /usr/doc with 'doc' being a symlink to /usr/share/doc. We call +@@ -547,7 +683,8 @@ def test_realpath_resolve_parents(self): + os.symlink(ABSTFN + "/y", ABSTFN + "/k") + + with os_helper.change_cwd(ABSTFN + "/k"): +- self.assertEqual(realpath("a"), ABSTFN + "/y/a") ++ self.assertEqual(realpath("a", **kwargs), ++ ABSTFN + "/y/a") + finally: + os_helper.unlink(ABSTFN + "/k") + safe_rmdir(ABSTFN + "/y") +@@ -555,7 +692,8 @@ def test_realpath_resolve_parents(self): + + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash +- def test_realpath_resolve_before_normalizing(self): ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_resolve_before_normalizing(self, kwargs): + # Bug #990669: Symbolic links should be resolved before we + # normalize the path. E.g.: if we have directories 'a', 'k' and 'y' + # in the following hierarchy: +@@ -570,10 +708,10 @@ def test_realpath_resolve_before_normalizing(self): + os.symlink(ABSTFN + "/k/y", ABSTFN + "/link-y") + + # Absolute path. +- self.assertEqual(realpath(ABSTFN + "/link-y/.."), ABSTFN + "/k") ++ self.assertEqual(realpath(ABSTFN + "/link-y/..", **kwargs), ABSTFN + "/k") + # Relative path. + with os_helper.change_cwd(dirname(ABSTFN)): +- self.assertEqual(realpath(basename(ABSTFN) + "/link-y/.."), ++ self.assertEqual(realpath(basename(ABSTFN) + "/link-y/..", **kwargs), + ABSTFN + "/k") + finally: + os_helper.unlink(ABSTFN + "/link-y") +@@ -583,7 +721,8 @@ def test_realpath_resolve_before_normalizing(self): + + @os_helper.skip_unless_symlink + @skip_if_ABSTFN_contains_backslash +- def test_realpath_resolve_first(self): ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_resolve_first(self, kwargs): + # Bug #1213894: The first component of the path, if not absolute, + # must be resolved too. + +@@ -593,13 +732,70 @@ def test_realpath_resolve_first(self): + os.symlink(ABSTFN, ABSTFN + "link") + with os_helper.change_cwd(dirname(ABSTFN)): + base = basename(ABSTFN) +- self.assertEqual(realpath(base + "link"), ABSTFN) +- self.assertEqual(realpath(base + "link/k"), ABSTFN + "/k") ++ self.assertEqual(realpath(base + "link", **kwargs), ABSTFN) ++ self.assertEqual(realpath(base + "link/k", **kwargs), ABSTFN + "/k") + finally: + os_helper.unlink(ABSTFN + "link") + safe_rmdir(ABSTFN + "/k") + safe_rmdir(ABSTFN) + ++ @os_helper.skip_unless_symlink ++ @skip_if_ABSTFN_contains_backslash ++ @unittest.skipIf(os.chmod not in os.supports_follow_symlinks, "Can't set symlink permissions") ++ @unittest.skipIf(sys.platform != "darwin", "only macOS requires read permission to readlink()") ++ @_parameterize({'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_realpath_unreadable_symlink_strict(self, kwargs): ++ try: ++ os.symlink(ABSTFN+"1", ABSTFN) ++ os.chmod(ABSTFN, 0o000, follow_symlinks=False) ++ with self.assertRaises(PermissionError): ++ realpath(ABSTFN, **kwargs) ++ with self.assertRaises(PermissionError): ++ realpath(ABSTFN + '/foo', **kwargs), ++ with self.assertRaises(PermissionError): ++ realpath(ABSTFN + '/../foo', **kwargs) ++ with self.assertRaises(PermissionError): ++ realpath(ABSTFN + '/foo/..', **kwargs) ++ finally: ++ os.chmod(ABSTFN, 0o755, follow_symlinks=False) ++ os.unlink(ABSTFN) ++ ++ @skip_if_ABSTFN_contains_backslash ++ @os_helper.skip_unless_symlink ++ def test_realpath_unreadable_directory(self): ++ try: ++ os.mkdir(ABSTFN) ++ os.mkdir(ABSTFN + '/k') ++ os.chmod(ABSTFN, 0o000) ++ self.assertEqual(realpath(ABSTFN, strict=False), ABSTFN) ++ self.assertEqual(realpath(ABSTFN, strict=True), ABSTFN) ++ self.assertEqual(realpath(ABSTFN, strict=ALLOW_MISSING), ABSTFN) ++ ++ try: ++ os.stat(ABSTFN) ++ except PermissionError: ++ pass ++ else: ++ self.skipTest('Cannot block permissions') ++ ++ self.assertEqual(realpath(ABSTFN + '/k', strict=False), ++ ABSTFN + '/k') ++ self.assertRaises(PermissionError, realpath, ABSTFN + '/k', ++ strict=True) ++ self.assertRaises(PermissionError, realpath, ABSTFN + '/k', ++ strict=ALLOW_MISSING) ++ ++ self.assertEqual(realpath(ABSTFN + '/missing', strict=False), ++ ABSTFN + '/missing') ++ self.assertRaises(PermissionError, realpath, ABSTFN + '/missing', ++ strict=True) ++ self.assertRaises(PermissionError, realpath, ABSTFN + '/missing', ++ strict=ALLOW_MISSING) ++ finally: ++ os.chmod(ABSTFN, 0o755) ++ safe_rmdir(ABSTFN + '/k') ++ safe_rmdir(ABSTFN) ++ + def test_relpath(self): + (real_getcwd, os.getcwd) = (os.getcwd, lambda: r"/home/user/bar") + try: +@@ -776,9 +972,12 @@ def test_path_normpath(self): + def test_path_abspath(self): + self.assertPathEqual(self.path.abspath) + +- def test_path_realpath(self): ++ @_parameterize({}, {'strict': True}, {'strict': ALLOW_MISSING}) ++ def test_path_realpath(self, kwargs): + self.assertPathEqual(self.path.realpath) + ++ self.assertPathEqual(partial(self.path.realpath, **kwargs)) ++ + def test_path_relpath(self): + self.assertPathEqual(self.path.relpath) + +diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py +index c99c88ce93af02..7377acdf398622 100644 +--- a/Lib/test/test_tarfile.py ++++ b/Lib/test/test_tarfile.py +@@ -2533,7 +2533,7 @@ def test__all__(self): + "fully_trusted_filter", "data_filter", + "tar_filter", "FilterError", "AbsoluteLinkError", + "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", +- "LinkOutsideDestinationError", ++ "LinkOutsideDestinationError", "LinkFallbackError", + } + support.check__all__(self, tarfile, not_exported=not_exported) + +@@ -2549,6 +2549,31 @@ def test_useful_error_message_when_modules_missing(self): + str(excinfo.exception), + ) + ++ @unittest.skipUnless(os_helper.can_symlink(), 'requires symlink support') ++ @unittest.skipUnless(hasattr(os, 'chmod'), "missing os.chmod") ++ @unittest.mock.patch('os.chmod') ++ def test_deferred_directory_attributes_update(self, mock_chmod): ++ # Regression test for gh-127987: setting attributes on arbitrary files ++ tempdir = os.path.join(TEMPDIR, 'test127987') ++ def mock_chmod_side_effect(path, mode, **kwargs): ++ target_path = os.path.realpath(path) ++ if os.path.commonpath([target_path, tempdir]) != tempdir: ++ raise Exception("should not try to chmod anything outside the destination", target_path) ++ mock_chmod.side_effect = mock_chmod_side_effect ++ ++ outside_tree_dir = os.path.join(TEMPDIR, 'outside_tree_dir') ++ with ArchiveMaker() as arc: ++ arc.add('x', symlink_to='.') ++ arc.add('x', type=tarfile.DIRTYPE, mode='?rwsrwsrwt') ++ arc.add('x', symlink_to=outside_tree_dir) ++ ++ os.makedirs(outside_tree_dir) ++ try: ++ arc.open().extractall(path=tempdir, filter='tar') ++ finally: ++ os_helper.rmtree(outside_tree_dir) ++ os_helper.rmtree(tempdir) ++ + + class CommandLineTest(unittest.TestCase): + +@@ -3105,6 +3130,10 @@ def check_files_present(self, directory): + got_paths = set( + p.relative_to(directory) + for p in pathlib.Path(directory).glob('**/*')) ++ if self.extraction_filter == 'data': ++ # The 'data' filter is expected to reject special files ++ for path in 'ustar/fifotype', 'ustar/blktype', 'ustar/chrtype': ++ got_paths.discard(pathlib.Path(path)) + self.assertEqual(self.control_paths, got_paths) + + @contextmanager +@@ -3331,12 +3360,28 @@ def __exit__(self, *exc): + self.bio = None + + def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, +- mode=None, size=None, **kwargs): +- """Add a member to the test archive. Call within `with`.""" ++ mode=None, size=None, content=None, **kwargs): ++ """Add a member to the test archive. Call within `with`. ++ ++ Provides many shortcuts: ++ - default `type` is based on symlink_to, hardlink_to, and trailing `/` ++ in name (which is stripped) ++ - size & content defaults are based on each other ++ - content can be str or bytes ++ - mode should be textual ('-rwxrwxrwx') ++ ++ (add more! this is unstable internal test-only API) ++ """ + name = str(name) + tarinfo = tarfile.TarInfo(name).replace(**kwargs) ++ if content is not None: ++ if isinstance(content, str): ++ content = content.encode() ++ size = len(content) + if size is not None: + tarinfo.size = size ++ if content is None: ++ content = bytes(tarinfo.size) + if mode: + tarinfo.mode = _filemode_to_int(mode) + if symlink_to is not None: +@@ -3350,7 +3395,7 @@ def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, + if type is not None: + tarinfo.type = type + if tarinfo.isreg(): +- fileobj = io.BytesIO(bytes(tarinfo.size)) ++ fileobj = io.BytesIO(content) + else: + fileobj = None + self.tar_w.addfile(tarinfo, fileobj) +@@ -3384,7 +3429,7 @@ class TestExtractionFilters(unittest.TestCase): + destdir = outerdir / 'dest' + + @contextmanager +- def check_context(self, tar, filter): ++ def check_context(self, tar, filter, *, check_flag=True): + """Extracts `tar` to `self.destdir` and allows checking the result + + If an error occurs, it must be checked using `expect_exception` +@@ -3393,27 +3438,40 @@ def check_context(self, tar, filter): + except the destination directory itself and parent directories of + other files. + When checking directories, do so before their contents. ++ ++ A file called 'flag' is made in outerdir (i.e. outside destdir) ++ before extraction; it should not be altered nor should its contents ++ be read/copied. + """ + with os_helper.temp_dir(self.outerdir): ++ flag_path = self.outerdir / 'flag' ++ flag_path.write_text('capture me') + try: + tar.extractall(self.destdir, filter=filter) + except Exception as exc: + self.raised_exception = exc ++ self.reraise_exception = True + self.expected_paths = set() + else: + self.raised_exception = None ++ self.reraise_exception = False + self.expected_paths = set(self.outerdir.glob('**/*')) + self.expected_paths.discard(self.destdir) ++ self.expected_paths.discard(flag_path) + try: +- yield ++ yield self + finally: + tar.close() +- if self.raised_exception: ++ if self.reraise_exception: + raise self.raised_exception + self.assertEqual(self.expected_paths, set()) ++ if check_flag: ++ self.assertEqual(flag_path.read_text(), 'capture me') ++ else: ++ assert filter == 'fully_trusted' + + def expect_file(self, name, type=None, symlink_to=None, mode=None, +- size=None): ++ size=None, content=None): + """Check a single file. See check_context.""" + if self.raised_exception: + raise self.raised_exception +@@ -3432,26 +3490,45 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None, + # The symlink might be the same (textually) as what we expect, + # but some systems change the link to an equivalent path, so + # we fall back to samefile(). +- if expected != got: +- self.assertTrue(got.samefile(expected)) ++ try: ++ if expected != got: ++ self.assertTrue(got.samefile(expected)) ++ except Exception as e: ++ # attach a note, so it's shown even if `samefile` fails ++ e.add_note(f'{expected=}, {got=}') ++ raise + elif type == tarfile.REGTYPE or type is None: + self.assertTrue(path.is_file()) + elif type == tarfile.DIRTYPE: + self.assertTrue(path.is_dir()) + elif type == tarfile.FIFOTYPE: + self.assertTrue(path.is_fifo()) ++ elif type == tarfile.SYMTYPE: ++ self.assertTrue(path.is_symlink()) + else: + raise NotImplementedError(type) + if size is not None: + self.assertEqual(path.stat().st_size, size) ++ if content is not None: ++ self.assertEqual(path.read_text(), content) + for parent in path.parents: + self.expected_paths.discard(parent) + ++ def expect_any_tree(self, name): ++ """Check a directory; forget about its contents.""" ++ tree_path = (self.destdir / name).resolve() ++ self.expect_file(tree_path, type=tarfile.DIRTYPE) ++ self.expected_paths = { ++ p for p in self.expected_paths ++ if tree_path not in p.parents ++ } ++ + def expect_exception(self, exc_type, message_re='.'): + with self.assertRaisesRegex(exc_type, message_re): + if self.raised_exception is not None: + raise self.raised_exception +- self.raised_exception = None ++ self.reraise_exception = False ++ return self.raised_exception + + def test_benign_file(self): + with ArchiveMaker() as arc: +@@ -3536,6 +3613,80 @@ def test_parent_symlink(self): + with self.check_context(arc.open(), 'data'): + self.expect_file('parent/evil') + ++ @symlink_test ++ @os_helper.skip_unless_symlink ++ def test_realpath_limit_attack(self): ++ # (CVE-2025-4517) ++ ++ with ArchiveMaker() as arc: ++ # populate the symlinks and dirs that expand in os.path.realpath() ++ # The component length is chosen so that in common cases, the unexpanded ++ # path fits in PATH_MAX, but it overflows when the final symlink ++ # is expanded ++ steps = "abcdefghijklmnop" ++ if sys.platform == 'win32': ++ component = 'd' * 25 ++ elif 'PC_PATH_MAX' in os.pathconf_names: ++ max_path_len = os.pathconf(self.outerdir.parent, "PC_PATH_MAX") ++ path_sep_len = 1 ++ dest_len = len(str(self.destdir)) + path_sep_len ++ component_len = (max_path_len - dest_len) // (len(steps) + path_sep_len) ++ component = 'd' * component_len ++ else: ++ raise NotImplementedError("Need to guess component length for {sys.platform}") ++ path = "" ++ step_path = "" ++ for i in steps: ++ arc.add(os.path.join(path, component), type=tarfile.DIRTYPE, ++ mode='drwxrwxrwx') ++ arc.add(os.path.join(path, i), symlink_to=component) ++ path = os.path.join(path, component) ++ step_path = os.path.join(step_path, i) ++ # create the final symlink that exceeds PATH_MAX and simply points ++ # to the top dir. ++ # this link will never be expanded by ++ # os.path.realpath(strict=False), nor anything after it. ++ linkpath = os.path.join(*steps, "l"*254) ++ parent_segments = [".."] * len(steps) ++ arc.add(linkpath, symlink_to=os.path.join(*parent_segments)) ++ # make a symlink outside to keep the tar command happy ++ arc.add("escape", symlink_to=os.path.join(linkpath, "..")) ++ # use the symlinks above, that are not checked, to create a hardlink ++ # to a file outside of the destination path ++ arc.add("flaglink", hardlink_to=os.path.join("escape", "flag")) ++ # now that we have the hardlink we can overwrite the file ++ arc.add("flaglink", content='overwrite') ++ # we can also create new files as well! ++ arc.add("escape/newfile", content='new') ++ ++ with (self.subTest('fully_trusted'), ++ self.check_context(arc.open(), filter='fully_trusted', ++ check_flag=False)): ++ if sys.platform == 'win32': ++ self.expect_exception((FileNotFoundError, FileExistsError)) ++ elif self.raised_exception: ++ # Cannot symlink/hardlink: tarfile falls back to getmember() ++ self.expect_exception(KeyError) ++ # Otherwise, this block should never enter. ++ else: ++ self.expect_any_tree(component) ++ self.expect_file('flaglink', content='overwrite') ++ self.expect_file('../newfile', content='new') ++ self.expect_file('escape', type=tarfile.SYMTYPE) ++ self.expect_file('a', symlink_to=component) ++ ++ for filter in 'tar', 'data': ++ with self.subTest(filter), self.check_context(arc.open(), filter=filter): ++ exc = self.expect_exception((OSError, KeyError)) ++ if isinstance(exc, OSError): ++ if sys.platform == 'win32': ++ # 3: ERROR_PATH_NOT_FOUND ++ # 5: ERROR_ACCESS_DENIED ++ # 206: ERROR_FILENAME_EXCED_RANGE ++ self.assertIn(exc.winerror, (3, 5, 206)) ++ else: ++ self.assertEqual(exc.errno, errno.ENAMETOOLONG) ++ + @symlink_test + def test_parent_symlink2(self): + # Test interplaying symlinks +@@ -3758,8 +3909,8 @@ def test_chains(self): + arc.add('symlink2', symlink_to=os.path.join( + 'linkdir', 'hardlink2')) + arc.add('targetdir/target', size=3) +- arc.add('linkdir/hardlink', hardlink_to='targetdir/target') +- arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink') ++ arc.add('linkdir/hardlink', hardlink_to=os.path.join('targetdir', 'target')) ++ arc.add('linkdir/hardlink2', hardlink_to=os.path.join('linkdir', 'symlink')) + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter): +@@ -3775,6 +3926,129 @@ def test_chains(self): + self.expect_file('linkdir/symlink', size=3) + self.expect_file('symlink2', size=3) + ++ @symlink_test ++ def test_sneaky_hardlink_fallback(self): ++ # (CVE-2025-4330) ++ # Test that when hardlink extraction falls back to extracting members ++ # from the archive, the extracted member is (re-)filtered. ++ with ArchiveMaker() as arc: ++ # Create a directory structure so the c/escape symlink stays ++ # inside the path ++ arc.add("a/t/dummy") ++ # Create b/ directory ++ arc.add("b/") ++ # Point "c" to the bottom of the tree in "a" ++ arc.add("c", symlink_to=os.path.join("a", "t")) ++ # link to non-existant location under "a" ++ arc.add("c/escape", symlink_to=os.path.join("..", "..", ++ "link_here")) ++ # Move "c" to point to "b" ("c/escape" no longer exists) ++ arc.add("c", symlink_to="b") ++ # Attempt to create a hard link to "c/escape". Since it doesn't ++ # exist it will attempt to extract "cescape" but at "boom". ++ arc.add("boom", hardlink_to=os.path.join("c", "escape")) ++ ++ with self.check_context(arc.open(), 'data'): ++ if not os_helper.can_symlink(): ++ # When 'c/escape' is extracted, 'c' is a regular ++ # directory, and 'c/escape' *would* point outside ++ # the destination if symlinks were allowed. ++ self.expect_exception( ++ tarfile.LinkOutsideDestinationError) ++ elif sys.platform == "win32": ++ # On Windows, 'c/escape' points outside the destination ++ self.expect_exception(tarfile.LinkOutsideDestinationError) ++ else: ++ e = self.expect_exception( ++ tarfile.LinkFallbackError, ++ "link 'boom' would be extracted as a copy of " ++ + "'c/escape', which was rejected") ++ self.assertIsInstance(e.__cause__, ++ tarfile.LinkOutsideDestinationError) ++ for filter in 'tar', 'fully_trusted': ++ with self.subTest(filter), self.check_context(arc.open(), filter): ++ if not os_helper.can_symlink(): ++ self.expect_file("a/t/dummy") ++ self.expect_file("b/") ++ self.expect_file("c/") ++ else: ++ self.expect_file("a/t/dummy") ++ self.expect_file("b/") ++ self.expect_file("a/t/escape", symlink_to='../../link_here') ++ self.expect_file("boom", symlink_to='../../link_here') ++ self.expect_file("c", symlink_to='b') ++ ++ @symlink_test ++ def test_exfiltration_via_symlink(self): ++ # (CVE-2025-4138) ++ # Test changing symlinks that result in a symlink pointing outside ++ # the extraction directory, unless prevented by 'data' filter's ++ # normalization. ++ with ArchiveMaker() as arc: ++ arc.add("escape", symlink_to=os.path.join('link', 'link', '..', '..', 'link-here')) ++ arc.add("link", symlink_to='./') ++ ++ for filter in 'tar', 'data', 'fully_trusted': ++ with self.check_context(arc.open(), filter): ++ if os_helper.can_symlink(): ++ self.expect_file("link", symlink_to='./') ++ if filter == 'data': ++ self.expect_file("escape", symlink_to='link-here') ++ else: ++ self.expect_file("escape", ++ symlink_to='link/link/../../link-here') ++ else: ++ # Nothing is extracted. ++ pass ++ ++ @symlink_test ++ def test_chmod_outside_dir(self): ++ # (CVE-2024-12718) ++ # Test that members used for delayed updates of directory metadata ++ # are (re-)filtered. ++ with ArchiveMaker() as arc: ++ # "pwn" is a veeeery innocent symlink: ++ arc.add("a/pwn", symlink_to='.') ++ # But now "pwn" is also a directory, so it's scheduled to have its ++ # metadata updated later: ++ arc.add("a/pwn/", mode='drwxrwxrwx') ++ # Oops, "pwn" is not so innocent any more: ++ arc.add("a/pwn", symlink_to='x/../') ++ # Newly created symlink points to the dest dir, ++ # so it's OK for the "data" filter. ++ arc.add('a/x', symlink_to=('../')) ++ # But now "pwn" points outside the dest dir ++ ++ for filter in 'tar', 'data', 'fully_trusted': ++ with self.check_context(arc.open(), filter) as cc: ++ if not os_helper.can_symlink(): ++ self.expect_file("a/pwn/") ++ elif filter == 'data': ++ self.expect_file("a/x", symlink_to='../') ++ self.expect_file("a/pwn", symlink_to='.') ++ else: ++ self.expect_file("a/x", symlink_to='../') ++ self.expect_file("a/pwn", symlink_to='x/../') ++ if sys.platform != "win32": ++ st_mode = cc.outerdir.stat().st_mode ++ self.assertNotEqual(st_mode & 0o777, 0o777) ++ ++ def test_link_fallback_normalizes(self): ++ # Make sure hardlink fallbacks work for non-normalized paths for all ++ # filters ++ with ArchiveMaker() as arc: ++ arc.add("dir/") ++ arc.add("dir/../afile") ++ arc.add("link1", hardlink_to='dir/../afile') ++ arc.add("link2", hardlink_to='dir/../dir/../afile') ++ ++ for filter in 'tar', 'data', 'fully_trusted': ++ with self.check_context(arc.open(), filter) as cc: ++ self.expect_file("dir/") ++ self.expect_file("afile") ++ self.expect_file("link1") ++ self.expect_file("link2") ++ + def test_modes(self): + # Test how file modes are extracted + # (Note that the modes are ignored on platforms without working chmod) +@@ -3888,7 +4162,7 @@ def test_tar_filter(self): + # The 'tar' filter returns TarInfo objects with the same name/type. + # (It can also fail for particularly "evil" input, but we don't have + # that in the test archive.) +- with tarfile.TarFile.open(tarname) as tar: ++ with tarfile.TarFile.open(tarname, encoding="iso8859-1") as tar: + for tarinfo in tar.getmembers(): + filtered = tarfile.tar_filter(tarinfo, '') + self.assertIs(filtered.name, tarinfo.name) +@@ -3897,7 +4171,7 @@ def test_tar_filter(self): + def test_data_filter(self): + # The 'data' filter either raises, or returns TarInfo with the same + # name/type. +- with tarfile.TarFile.open(tarname) as tar: ++ with tarfile.TarFile.open(tarname, encoding="iso8859-1") as tar: + for tarinfo in tar.getmembers(): + try: + filtered = tarfile.data_filter(tarinfo, '') +@@ -4026,13 +4300,13 @@ def valueerror_filter(tarinfo, path): + # If errorlevel is 0, errors affected by errorlevel are ignored + + with self.check_context(arc.open(errorlevel=0), extracterror_filter): +- self.expect_file('file') ++ pass + + with self.check_context(arc.open(errorlevel=0), filtererror_filter): +- self.expect_file('file') ++ pass + + with self.check_context(arc.open(errorlevel=0), oserror_filter): +- self.expect_file('file') ++ pass + + with self.check_context(arc.open(errorlevel=0), tarerror_filter): + self.expect_exception(tarfile.TarError) +@@ -4043,7 +4317,7 @@ def valueerror_filter(tarinfo, path): + # If 1, all fatal errors are raised + + with self.check_context(arc.open(errorlevel=1), extracterror_filter): +- self.expect_file('file') ++ pass + + with self.check_context(arc.open(errorlevel=1), filtererror_filter): + self.expect_exception(tarfile.FilterError) +diff --git a/Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst b/Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst +new file mode 100644 +index 00000000000000..e3f984afc21578 +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2025-06-02-11-32-23.gh-issue-135034.RLGjbp.rst +@@ -0,0 +1,6 @@ ++Fixes multiple issues that allowed ``tarfile`` extraction filters ++(``filter="data"`` and ``filter="tar"``) to be bypassed using crafted ++symlinks and hard links. ++ ++Addresses CVE 2024-12718, CVE 2025-4138, CVE 2025-4330, and CVE 2025-4517. ++ diff --git a/python3.spec b/python3.spec index 82306d87cdbb6fe861085edcb5b0b253c4a3c245..fdad4e450693f4943f519ddc49a349fae77179ca 100644 --- a/python3.spec +++ b/python3.spec @@ -1,4 +1,4 @@ -%define anolis_release 8 +%define anolis_release 9 %global pybasever 3.11 # pybasever without the dot: @@ -251,6 +251,27 @@ Patch1008: fix-CVE-2025-4516.patch Patch1009: fix-CVE-2025-8194.patch # https://github.com/python/cpython/pull/135484 Patch1010: fix-CVE-2025-6069.patch +# https://github.com/python/cpython/commit/ae961ae94bf19c8f8c7fbea3d1c25cc55ce8ae97 +Patch1011: CVE-2024-9287.patch +# https://github.com/python/cpython/commit/a956e510f6336d5ae111ba429a61c3ade30a7549 +Patch1012: CVE-2024-0450.patch +# https://github.com/python/cpython/commit/ba431579efdcbaed7a96f2ac4ea0775879a332fb +Patch1013: CVE-2024-4032.patch +# https://github.com/python/cpython/commit/4633f3f497b1ff70e4a35b6fe2c907cbe2d4cb2e +Patch1014: CVE-2025-4517.patch +# https://github.com/python/cpython/commit/5f90abaa786f994db3907fc31e2ee00ea2cf0929 +Patch1015: CVE-2024-3219.patch +# https://github.com/python/cpython/commit/d4ac921a4b081f7f996a5d2b101684b67ba0ed7f +Patch1016: CVE-2024-7592.patch +# https://github.com/python/cpython/commit/795f2597a4be988e2bb19b69ff9958e981cb894e +# https://github.com/python/cpython/commit/fc0b8259e693caa8400fa8b6ac1e494e47ea7798 +Patch1017: CVE-2024-8088.patch +# https://github.com/python/cpython/commit/f7c0f09e69e950cf3c5ada9dbde93898eb975533 +Patch1018: CVE-2024-6923.patch +# https://github.com/python/cpython/pull/123767/commits/067fc2ea57ecf8897806580794db93b94796d43f +Patch1019: CVE-2023-27043.patch +# https://github.com/python/cpython/commit/b7431133441a92670132600e5af78b64dd25539b.patch +Patch1020: CVE-2024-6232.patch # ========================================== # Descriptions, and metadata for subpackages @@ -1523,6 +1544,18 @@ CheckPython optimized # ====================================================== %changelog +* Thu Aug 14 2025 Bo Ren - 3.11.6-9 +- fix CVE-2024-9287 +- fix CVE-2024-0450 +- fix CVE-2024-4032 +- fix CVE-2025-4517 +- fix CVE-2024-3219 +- fix CVE-2024-7592 +- fix CVE-2024-8088 +- fix CVE-2023-27043 +- fix CVE-2024-6232 +- fix CVE-2024-6923 + * Wed Aug 13 2025 wenxin - 3.11.6-8 - Add patch to fix CVE-2025-6069