diff --git a/CVE-2023-27043.patch b/CVE-2023-27043.patch new file mode 100644 index 0000000000000000000000000000000000000000..7d2a8132a9a6ee1156021e70bc2fdb7f04d86372 --- /dev/null +++ b/CVE-2023-27043.patch @@ -0,0 +1,500 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Victor Stinner +Date: Fri, 15 Dec 2023 16:10:40 +0100 +Subject: [PATCH] 00415: [CVE-2023-27043] gh-102988: Reject malformed addresses + in email.parseaddr() (#111116) + +Detect email address parsing errors and return empty tuple to +indicate the parsing error (old API). Add an optional 'strict' +parameter to getaddresses() and parseaddr() functions. Patch by +Thomas Dwyer. + +Co-Authored-By: Thomas Dwyer +--- + Doc/library/email.utils.rst | 19 +- + Lib/email/utils.py | 151 ++++++++++++- + Lib/test/test_email/test_email.py | 204 +++++++++++++++++- + ...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 + + 4 files changed, 361 insertions(+), 21 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst + +diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst +index 0e266b6a45..6723dc4f13 100644 +--- a/Doc/library/email.utils.rst ++++ b/Doc/library/email.utils.rst +@@ -60,13 +60,18 @@ of the new API. + begins with angle brackets, they are stripped off. + + +-.. function:: parseaddr(address) ++.. function:: parseaddr(address, *, strict=True) + + Parse address -- which should be the value of some address-containing field such + as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and + *email address* parts. Returns a tuple of that information, unless the parse + fails, in which case a 2-tuple of ``('', '')`` is returned. + ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. ++ + + .. function:: formataddr(pair, charset='utf-8') + +@@ -84,12 +89,15 @@ of the new API. + Added the *charset* option. + + +-.. function:: getaddresses(fieldvalues) ++.. function:: getaddresses(fieldvalues, *, strict=True) + + This method returns a list of 2-tuples of the form returned by ``parseaddr()``. + *fieldvalues* is a sequence of header field values as might be returned by +- :meth:`Message.get_all `. Here's a simple +- example that gets all the recipients of a message:: ++ :meth:`Message.get_all `. ++ ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ Here's a simple example that gets all the recipients of a message:: + + from email.utils import getaddresses + +@@ -99,6 +107,9 @@ of the new API. + resent_ccs = msg.get_all('resent-cc', []) + all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. ++ + + .. function:: parsedate(date) + +diff --git a/Lib/email/utils.py b/Lib/email/utils.py +index cfdfeb3f1a..9522341fab 100644 +--- a/Lib/email/utils.py ++++ b/Lib/email/utils.py +@@ -48,6 +48,7 @@ + specialsre = re.compile(r'[][\\()<>@,:;".]') + escapesre = re.compile(r'[\\"]') + ++ + def _has_surrogates(s): + """Return True if s contains surrogate-escaped binary data.""" + # This check is based on the fact that unless there are surrogates, utf8 +@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'): + return address + + ++def _iter_escaped_chars(addr): ++ pos = 0 ++ escape = False ++ for pos, ch in enumerate(addr): ++ if escape: ++ yield (pos, '\\' + ch) ++ escape = False ++ elif ch == '\\': ++ escape = True ++ else: ++ yield (pos, ch) ++ if escape: ++ yield (pos, '\\') + +-def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" +- all = COMMASPACE.join(str(v) for v in fieldvalues) +- a = _AddressList(all) +- return a.addresslist ++ ++def _strip_quoted_realnames(addr): ++ """Strip real names between quotes.""" ++ if '"' not in addr: ++ # Fast path ++ return addr ++ ++ start = 0 ++ open_pos = None ++ result = [] ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '"': ++ if open_pos is None: ++ open_pos = pos ++ else: ++ if start != open_pos: ++ result.append(addr[start:open_pos]) ++ start = pos + 1 ++ open_pos = None ++ ++ if start < len(addr): ++ result.append(addr[start:]) ++ ++ return ''.join(result) ++ ++ ++supports_strict_parsing = True ++ ++def getaddresses(fieldvalues, *, strict=True): ++ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. ++ ++ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in ++ its place. ++ ++ If strict is true, use a strict parser which rejects malformed inputs. ++ """ ++ ++ # If strict is true, if the resulting list of parsed addresses is greater ++ # than the number of fieldvalues in the input list, a parsing error has ++ # occurred and consequently a list containing a single empty 2-tuple [('', ++ # '')] is returned in its place. This is done to avoid invalid output. ++ # ++ # Malformed input: getaddresses(['alice@example.com ']) ++ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] ++ # Safe output: [('', '')] ++ ++ if not strict: ++ all = COMMASPACE.join(str(v) for v in fieldvalues) ++ a = _AddressList(all) ++ return a.addresslist ++ ++ fieldvalues = [str(v) for v in fieldvalues] ++ fieldvalues = _pre_parse_validation(fieldvalues) ++ addr = COMMASPACE.join(fieldvalues) ++ a = _AddressList(addr) ++ result = _post_parse_validation(a.addresslist) ++ ++ # Treat output as invalid if the number of addresses is not equal to the ++ # expected number of addresses. ++ n = 0 ++ for v in fieldvalues: ++ # When a comma is used in the Real Name part it is not a deliminator. ++ # So strip those out before counting the commas. ++ v = _strip_quoted_realnames(v) ++ # Expected number of addresses: 1 + number of commas ++ n += 1 + v.count(',') ++ if len(result) != n: ++ return [('', '')] ++ ++ return result ++ ++ ++def _check_parenthesis(addr): ++ # Ignore parenthesis in quoted real names. ++ addr = _strip_quoted_realnames(addr) ++ ++ opens = 0 ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '(': ++ opens += 1 ++ elif ch == ')': ++ opens -= 1 ++ if opens < 0: ++ return False ++ return (opens == 0) ++ ++ ++def _pre_parse_validation(email_header_fields): ++ accepted_values = [] ++ for v in email_header_fields: ++ if not _check_parenthesis(v): ++ v = "('', '')" ++ accepted_values.append(v) ++ ++ return accepted_values ++ ++ ++def _post_parse_validation(parsed_email_header_tuples): ++ accepted_values = [] ++ # The parser would have parsed a correctly formatted domain-literal ++ # The existence of an [ after parsing indicates a parsing failure ++ for v in parsed_email_header_tuples: ++ if '[' in v[1]: ++ v = ('', '') ++ accepted_values.append(v) ++ ++ return accepted_values + + + def _format_timetuple_and_zone(timetuple, zone): +@@ -205,16 +321,33 @@ def parsedate_to_datetime(data): + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + + +-def parseaddr(addr): ++def parseaddr(addr, *, strict=True): + """ + Parse addr into its constituent realname and email address parts. + + Return a tuple of realname and email address, unless the parse fails, in + which case return a 2-tuple of ('', ''). ++ ++ If strict is True, use a strict parser which rejects malformed inputs. + """ +- addrs = _AddressList(addr).addresslist +- if not addrs: +- return '', '' ++ if not strict: ++ addrs = _AddressList(addr).addresslist ++ if not addrs: ++ return ('', '') ++ return addrs[0] ++ ++ if isinstance(addr, list): ++ addr = addr[0] ++ ++ if not isinstance(addr, str): ++ return ('', '') ++ ++ addr = _pre_parse_validation([addr])[0] ++ addrs = _post_parse_validation(_AddressList(addr).addresslist) ++ ++ if not addrs or len(addrs) > 1: ++ return ('', '') ++ + return addrs[0] + + +diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py +index 8b16cca9bf..5b19bb38f6 100644 +--- a/Lib/test/test_email/test_email.py ++++ b/Lib/test/test_email/test_email.py +@@ -16,6 +16,7 @@ + + import email + import email.policy ++import email.utils + + from email.charset import Charset + from email.generator import Generator, DecodedGenerator, BytesGenerator +@@ -3288,15 +3289,154 @@ def test_getaddresses(self): + [('Al Person', 'aperson@dom.ain'), + ('Bud Person', 'bperson@dom.ain')]) + ++ def test_getaddresses_comma_in_name(self): ++ """GH-106669 regression test.""" ++ self.assertEqual( ++ utils.getaddresses( ++ [ ++ '"Bud, Person" ', ++ 'aperson@dom.ain (Al Person)', ++ '"Mariusz Felisiak" ', ++ ] ++ ), ++ [ ++ ('Bud, Person', 'bperson@dom.ain'), ++ ('Al Person', 'aperson@dom.ain'), ++ ('Mariusz Felisiak', 'to@example.com'), ++ ], ++ ) ++ ++ def test_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" ++ alice = 'alice@example.org' ++ bob = 'bob@example.com' ++ empty = ('', '') ++ ++ # Test utils.getaddresses() and utils.parseaddr() on malformed email ++ # addresses: default behavior (strict=True) rejects malformed address, ++ # and strict=False which tolerates malformed address. ++ for invalid_separator, expected_non_strict in ( ++ ('(', [(f'<{bob}>', alice)]), ++ (')', [('', alice), empty, ('', bob)]), ++ ('<', [('', alice), empty, ('', bob), empty]), ++ ('>', [('', alice), empty, ('', bob)]), ++ ('[', [('', f'{alice}[<{bob}>]')]), ++ (']', [('', alice), empty, ('', bob)]), ++ ('@', [empty, empty, ('', bob)]), ++ (';', [('', alice), empty, ('', bob)]), ++ (':', [('', alice), ('', bob)]), ++ ('.', [('', alice + '.'), ('', bob)]), ++ ('"', [('', alice), ('', f'<{bob}>')]), ++ ): ++ address = f'{alice}{invalid_separator}<{bob}>' ++ with self.subTest(address=address): ++ self.assertEqual(utils.getaddresses([address]), ++ [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ expected_non_strict) ++ ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Comma (',') is treated differently depending on strict parameter. ++ # Comma without quotes. ++ address = f'{alice},<{bob}>' ++ self.assertEqual(utils.getaddresses([address]), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Real name between quotes containing comma. ++ address = '"Alice, alice@example.org" ' ++ expected_strict = ('Alice, alice@example.org', 'bob@example.com') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Valid parenthesis in comments. ++ address = 'alice@example.org (Alice)' ++ expected_strict = ('Alice', 'alice@example.org') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Invalid parenthesis in comments. ++ address = 'alice@example.org )Alice(' ++ self.assertEqual(utils.getaddresses([address]), [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Two addresses with quotes separated by comma. ++ address = '"Jane Doe" , "John Doe" ' ++ self.assertEqual(utils.getaddresses([address]), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Test email.utils.supports_strict_parsing attribute ++ self.assertEqual(email.utils.supports_strict_parsing, True) ++ + def test_getaddresses_nasty(self): +- eq = self.assertEqual +- eq(utils.getaddresses(['foo: ;']), [('', '')]) +- eq(utils.getaddresses( +- ['[]*-- =~$']), +- [('', ''), ('', ''), ('', '*--')]) +- eq(utils.getaddresses( +- ['foo: ;', '"Jason R. Mastaler" ']), +- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) ++ for addresses, expected in ( ++ (['"Sürname, Firstname" '], ++ [('Sürname, Firstname', 'to@example.com')]), ++ ++ (['foo: ;'], ++ [('', '')]), ++ ++ (['foo: ;', '"Jason R. Mastaler" '], ++ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), ++ ++ ([r'Pete(A nice \) chap) '], ++ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), ++ ++ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], ++ [('', '')]), ++ ++ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], ++ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), ++ ++ (['John Doe '], ++ [('John Doe (comment)', 'jdoe@machine.example')]), ++ ++ (['"Mary Smith: Personal Account" '], ++ [('Mary Smith: Personal Account', 'smith@home.example')]), ++ ++ (['Undisclosed recipients:;'], ++ [('', '')]), ++ ++ ([r', "Giant; \"Big\" Box" '], ++ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), ++ ): ++ with self.subTest(addresses=addresses): ++ self.assertEqual(utils.getaddresses(addresses), ++ expected) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ expected) ++ ++ addresses = ['[]*-- =~$'] ++ self.assertEqual(utils.getaddresses(addresses), ++ [('', '')]) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ [('', ''), ('', ''), ('', '*--')]) + + def test_getaddresses_embedded_comment(self): + """Test proper handling of a nested comment""" +@@ -3485,6 +3625,54 @@ def test_mime_classes_policy_argument(self): + m = cls(*constructor, policy=email.policy.default) + self.assertIs(m.policy, email.policy.default) + ++ def test_iter_escaped_chars(self): ++ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), ++ [(0, 'a'), ++ (2, '\\\\'), ++ (3, 'b'), ++ (5, '\\"'), ++ (6, 'c'), ++ (8, '\\\\'), ++ (9, '"'), ++ (10, 'd')]) ++ self.assertEqual(list(utils._iter_escaped_chars('a\\')), ++ [(0, 'a'), (1, '\\')]) ++ ++ def test_strip_quoted_realnames(self): ++ def check(addr, expected): ++ self.assertEqual(utils._strip_quoted_realnames(addr), expected) ++ ++ check('"Jane Doe" , "John Doe" ', ++ ' , ') ++ check(r'"Jane \"Doe\"." ', ++ ' ') ++ ++ # special cases ++ check(r'before"name"after', 'beforeafter') ++ check(r'before"name"', 'before') ++ check(r'b"name"', 'b') # single char ++ check(r'"name"after', 'after') ++ check(r'"name"a', 'a') # single char ++ check(r'"name"', '') ++ ++ # no change ++ for addr in ( ++ 'Jane Doe , John Doe ', ++ 'lone " quote', ++ ): ++ self.assertEqual(utils._strip_quoted_realnames(addr), addr) ++ ++ ++ def test_check_parenthesis(self): ++ addr = 'alice@example.net' ++ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) ++ ++ # Ignore real name between quotes ++ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) ++ + + # Test the iterator/generators + class TestIterators(TestEmailBase): +diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst +new file mode 100644 +index 0000000000..3d0e9e4078 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst +@@ -0,0 +1,8 @@ ++:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now ++return ``('', '')`` 2-tuples in more situations where invalid email ++addresses are encountered instead of potentially inaccurate values. Add ++optional *strict* parameter to these two functions: use ``strict=False`` to ++get the old behavior, accept malformed inputs. ++``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check ++if the *strict* paramater is available. Patch by Thomas Dwyer and Victor ++Stinner to improve the CVE-2023-27043 fix. diff --git a/fix-tests-for-xmlpullparser-with-expat-2-6-0.patch b/fix-tests-for-xmlpullparser-with-expat-2-6-0.patch new file mode 100644 index 0000000000000000000000000000000000000000..a54d321e55fe2fecd5a4a84ede88919de71dac90 --- /dev/null +++ b/fix-tests-for-xmlpullparser-with-expat-2-6-0.patch @@ -0,0 +1,107 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Sun, 11 Feb 2024 12:08:39 +0200 +Subject: [PATCH] 00422: gh-115133: Fix tests for XMLPullParser with Expat + 2.6.0 + +Feeding the parser by too small chunks defers parsing to prevent +CVE-2023-52425. Future versions of Expat may be more reactive. + +(cherry picked from commit 4a08e7b3431cd32a0daf22a33421cd3035343dc4) +--- + Lib/test/test_xml_etree.py | 58 ++++++++++++------- + ...-02-08-14-21-28.gh-issue-115133.ycl4ko.rst | 2 + + 2 files changed, 38 insertions(+), 22 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst + +diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py +index 940e02630e..c03d1bacda 100644 +--- a/Lib/test/test_xml_etree.py ++++ b/Lib/test/test_xml_etree.py +@@ -13,6 +13,7 @@ + import operator + import os + import pickle ++import pyexpat + import sys + import textwrap + import types +@@ -120,6 +121,10 @@ + + """ + ++fails_with_expat_2_6_0 = (unittest.expectedFailure ++ if pyexpat.version_info >= (2, 6, 0) else ++ lambda test: test) ++ + def checkwarnings(*filters, quiet=False): + def decorator(test): + def newtest(*args, **kwargs): +@@ -1396,28 +1401,37 @@ def assert_event_tags(self, parser, expected, max_events=None): + self.assertEqual([(action, elem.tag) for action, elem in events], + expected) + +- def test_simple_xml(self): +- for chunk_size in (None, 1, 5): +- with self.subTest(chunk_size=chunk_size): +- parser = ET.XMLPullParser() +- self.assert_event_tags(parser, []) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, []) +- self._feed(parser, +- "\n text\n", chunk_size) +- self.assert_event_tags(parser, [('end', 'element')]) +- self._feed(parser, "texttail\n", chunk_size) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, [ +- ('end', 'element'), +- ('end', 'empty-element'), +- ]) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, [('end', 'root')]) +- self.assertIsNone(parser.close()) ++ def test_simple_xml(self, chunk_size=None): ++ parser = ET.XMLPullParser() ++ self.assert_event_tags(parser, []) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, []) ++ self._feed(parser, ++ "\n text\n", chunk_size) ++ self.assert_event_tags(parser, [('end', 'element')]) ++ self._feed(parser, "texttail\n", chunk_size) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, [ ++ ('end', 'element'), ++ ('end', 'empty-element'), ++ ]) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, [('end', 'root')]) ++ self.assertIsNone(parser.close()) ++ ++ @fails_with_expat_2_6_0 ++ def test_simple_xml_chunk_1(self): ++ self.test_simple_xml(chunk_size=1) ++ ++ @fails_with_expat_2_6_0 ++ def test_simple_xml_chunk_5(self): ++ self.test_simple_xml(chunk_size=5) ++ ++ def test_simple_xml_chunk_22(self): ++ self.test_simple_xml(chunk_size=22) + + def test_feed_while_iterating(self): + parser = ET.XMLPullParser() +diff --git a/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst b/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst +new file mode 100644 +index 0000000000..6f1015235c +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2024-02-08-14-21-28.gh-issue-115133.ycl4ko.rst +@@ -0,0 +1,2 @@ ++Fix tests for :class:`~xml.etree.ElementTree.XMLPullParser` with Expat ++2.6.0. diff --git a/python-3.10-gh-113659-skip-hidden-pth-files.patch b/python-3.10-gh-113659-skip-hidden-pth-files.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c339c950778e463dfa6e221c73bd569fe75c53c --- /dev/null +++ b/python-3.10-gh-113659-skip-hidden-pth-files.patch @@ -0,0 +1,141 @@ +From 48d96c490d86d4735dd216a8b4e320b2c5ae47cb Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Tue, 16 Jan 2024 20:23:05 +0200 +Subject: [PATCH 1/2] gh-113659: Skip hidden .pth files (GH-113660) + +Skip .pth files with names starting with a dot or hidden file attribute. +(cherry picked from commit 74208ed0c440244fb809d8acc97cb9ef51e888e3) + +Co-authored-by: Serhiy Storchaka +--- + Lib/site.py | 12 +++++- + Lib/test/test_site.py | 40 +++++++++++++++++++ + ...-01-02-19-52-23.gh-issue-113659.DkmnQc.rst | 1 + + 3 files changed, 52 insertions(+), 1 deletion(-) + create mode 100644 Misc/NEWS.d/next/Security/2024-01-02-19-52-23.gh-issue-113659.DkmnQc.rst + +diff --git a/Lib/site.py b/Lib/site.py +index 939893eb5ee93b..5302037e0bf2c1 100644 +--- a/Lib/site.py ++++ b/Lib/site.py +@@ -74,6 +74,7 @@ + import builtins + import _sitebuiltins + import io ++import stat + + # Prefixes for site-packages; add additional prefixes like /usr/local here + PREFIXES = [sys.prefix, sys.exec_prefix] +@@ -168,6 +169,14 @@ def addpackage(sitedir, name, known_paths): + else: + reset = False + fullname = os.path.join(sitedir, name) ++ try: ++ st = os.lstat(fullname) ++ except OSError: ++ return ++ if ((getattr(st, 'st_flags', 0) & stat.UF_HIDDEN) or ++ (getattr(st, 'st_file_attributes', 0) & stat.FILE_ATTRIBUTE_HIDDEN)): ++ _trace(f"Skipping hidden .pth file: {fullname!r}") ++ return + _trace(f"Processing .pth file: {fullname!r}") + try: + # locale encoding is not ideal especially on Windows. But we have used +@@ -221,7 +230,8 @@ def addsitedir(sitedir, known_paths=None): + names = os.listdir(sitedir) + except OSError: + return +- names = [name for name in names if name.endswith(".pth")] ++ names = [name for name in names ++ if name.endswith(".pth") and not name.startswith(".")] + for name in sorted(names): + addpackage(sitedir, name, known_paths) + if reset: +diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py +index 93349ed8bb16a3..d8bb03087f3b25 100644 +--- a/Lib/test/test_site.py ++++ b/Lib/test/test_site.py +@@ -18,6 +18,7 @@ + import os + import re + import shutil ++import stat + import subprocess + import sys + import sysconfig +@@ -194,6 +195,45 @@ def test_addsitedir(self): + finally: + pth_file.cleanup() + ++ def test_addsitedir_dotfile(self): ++ pth_file = PthFile('.dotfile') ++ pth_file.cleanup(prep=True) ++ try: ++ pth_file.create() ++ site.addsitedir(pth_file.base_dir, set()) ++ self.assertNotIn(site.makepath(pth_file.good_dir_path)[0], sys.path) ++ self.assertIn(pth_file.base_dir, sys.path) ++ finally: ++ pth_file.cleanup() ++ ++ @unittest.skipUnless(hasattr(os, 'chflags'), 'test needs os.chflags()') ++ def test_addsitedir_hidden_flags(self): ++ pth_file = PthFile() ++ pth_file.cleanup(prep=True) ++ try: ++ pth_file.create() ++ st = os.stat(pth_file.file_path) ++ os.chflags(pth_file.file_path, st.st_flags | stat.UF_HIDDEN) ++ site.addsitedir(pth_file.base_dir, set()) ++ self.assertNotIn(site.makepath(pth_file.good_dir_path)[0], sys.path) ++ self.assertIn(pth_file.base_dir, sys.path) ++ finally: ++ pth_file.cleanup() ++ ++ @unittest.skipUnless(sys.platform == 'win32', 'test needs Windows') ++ @support.requires_subprocess() ++ def test_addsitedir_hidden_file_attribute(self): ++ pth_file = PthFile() ++ pth_file.cleanup(prep=True) ++ try: ++ pth_file.create() ++ subprocess.check_call(['attrib', '+H', pth_file.file_path]) ++ site.addsitedir(pth_file.base_dir, set()) ++ self.assertNotIn(site.makepath(pth_file.good_dir_path)[0], sys.path) ++ self.assertIn(pth_file.base_dir, sys.path) ++ finally: ++ pth_file.cleanup() ++ + # This tests _getuserbase, hence the double underline + # to distinguish from a test for getuserbase + def test__getuserbase(self): +diff --git a/Misc/NEWS.d/next/Security/2024-01-02-19-52-23.gh-issue-113659.DkmnQc.rst b/Misc/NEWS.d/next/Security/2024-01-02-19-52-23.gh-issue-113659.DkmnQc.rst +new file mode 100644 +index 00000000000000..744687e72324d1 +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2024-01-02-19-52-23.gh-issue-113659.DkmnQc.rst +@@ -0,0 +1 @@ ++Skip ``.pth`` files with names starting with a dot or hidden file attribute. + +From a93179a578512e2b6a8652f145b651914b66a880 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?=C5=81ukasz=20Langa?= +Date: Wed, 17 Jan 2024 15:17:47 +0100 +Subject: [PATCH 2/2] Don't use `support.requires_subprocess()` as it doesn't + exist in 3.10 + +--- + Lib/test/test_site.py | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py +index d8bb03087f3b25..c70e1fa9ae1041 100644 +--- a/Lib/test/test_site.py ++++ b/Lib/test/test_site.py +@@ -221,7 +221,6 @@ def test_addsitedir_hidden_flags(self): + pth_file.cleanup() + + @unittest.skipUnless(sys.platform == 'win32', 'test needs Windows') +- @support.requires_subprocess() + def test_addsitedir_hidden_file_attribute(self): + pth_file = PthFile() + pth_file.cleanup(prep=True) diff --git a/python3.spec b/python3.spec index 5a016ac139ed0aa270d1840a07e734da47503776..24a920e883c82291c1c851c3b88b31eb1e49ab36 100644 --- a/python3.spec +++ b/python3.spec @@ -1,4 +1,4 @@ -%define anolis_release 1 +%define anolis_release 2 %global pybasever 3.10 # pybasever without the dot: @@ -240,6 +240,14 @@ Patch1001: python3-3.10.10-fix-linkage.patch Patch1002: python3-3.10.10-link-C-modules-with-libpython.patch # add loongarch64 support Patch1003: 0001-add-loongarch64-support-for-python-3.10.12.patch +Patch1004: CVE-2023-27043.patch +# https://github.com/python/cpython/issues/113659 +Patch1005: python-3.10-gh-113659-skip-hidden-pth-files.patch +# gh-115133: Fix tests for XMLPullParser with Expat 2.6.0 +# +# Feeding the parser by too small chunks defers parsing to prevent +# CVE-2023-52425. Future versions of Expat may be more reactive. +Patch1006: fix-tests-for-xmlpullparser-with-expat-2-6-0.patch # ========================================== # Descriptions, and metadata for subpackages @@ -1492,6 +1500,11 @@ CheckPython optimized # ====================================================== %changelog +* Thu Dec 21 2023 Funda Wang - 3.10.13-2 +- fix CVE-2023-27043 +- Fix gh-113659: Skip hidden .pth files +- Fix gh-115133: Fix tests for XMLPullParser with Expat 2.6.0 + * Fri Aug 25 2023 Funda Wang - 3.10.13-1 - New version 3.10.13