From d94b3a8fc6ad995d1dbbe5128c78a8cb88881d08 Mon Sep 17 00:00:00 2001 From: Guo Ce Date: Tue, 5 Nov 2024 14:40:19 +0800 Subject: [PATCH] fix CVE-2023-27043 --- backport-CVE-2023-27043.patch | 487 ++++++++++++++++++ ...arsing-errors-in-email-_parseaddr.py.patch | 265 ++++++++++ ...port-Revert-fixes-for-CVE-2023-27043.patch | 284 ++++++++++ python3.spec | 16 +- 4 files changed, 1051 insertions(+), 1 deletion(-) create mode 100644 backport-CVE-2023-27043.patch create mode 100644 backport-Fix-parsing-errors-in-email-_parseaddr.py.patch create mode 100644 backport-Revert-fixes-for-CVE-2023-27043.patch diff --git a/backport-CVE-2023-27043.patch b/backport-CVE-2023-27043.patch new file mode 100644 index 0000000..5887022 --- /dev/null +++ b/backport-CVE-2023-27043.patch @@ -0,0 +1,487 @@ +From 49d48ccd462c97ecf63ce9ee4e8f1e94b2604a11 Mon Sep 17 00:00:00 2001 +From: Victor Stinner +Date: Fri, 15 Dec 2023 16:10:40 +0100 +Subject: [PATCH] [Backport] CVE-2023-27043 Reject malformed addresses in + email.parseaddr() + +Reference: https://github.com/python/cpython/pull/111116 + +Detect email address parsing errors and return empty tuple to +indicate the parsing error (old API). Add an optional 'strict' +parameter to getaddresses() and parseaddr() functions. + +Offering: CloudBu CMP + +CVE: CVE-2023-27043 +--- + Doc/library/email.utils.rst | 19 +- + Lib/email/utils.py | 151 +++++++++++++- + Lib/test/test_email/test_email.py | 186 +++++++++++++++++- + ...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 + + 4 files changed, 343 insertions(+), 21 deletions(-) + create mode 100644 Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst + +diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst +index 0e266b6..6723dc4 100644 +--- a/Doc/library/email.utils.rst ++++ b/Doc/library/email.utils.rst +@@ -60,13 +60,18 @@ of the new API. + begins with angle brackets, they are stripped off. + + +-.. function:: parseaddr(address) ++.. function:: parseaddr(address, *, strict=True) + + Parse address -- which should be the value of some address-containing field such + as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and + *email address* parts. Returns a tuple of that information, unless the parse + fails, in which case a 2-tuple of ``('', '')`` is returned. + ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. ++ + + .. function:: formataddr(pair, charset='utf-8') + +@@ -84,12 +89,15 @@ of the new API. + Added the *charset* option. + + +-.. function:: getaddresses(fieldvalues) ++.. function:: getaddresses(fieldvalues, *, strict=True) + + This method returns a list of 2-tuples of the form returned by ``parseaddr()``. + *fieldvalues* is a sequence of header field values as might be returned by +- :meth:`Message.get_all `. Here's a simple +- example that gets all the recipients of a message:: ++ :meth:`Message.get_all `. ++ ++ If *strict* is true, use a strict parser which rejects malformed inputs. ++ ++ Here's a simple example that gets all the recipients of a message:: + + from email.utils import getaddresses + +@@ -99,6 +107,9 @@ of the new API. + resent_ccs = msg.get_all('resent-cc', []) + all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + ++ .. versionchanged:: 3.13 ++ Add *strict* optional parameter and reject malformed inputs by default. ++ + + .. function:: parsedate(date) + +diff --git a/Lib/email/utils.py b/Lib/email/utils.py +index 95620b7..98e5ee1 100644 +--- a/Lib/email/utils.py ++++ b/Lib/email/utils.py +@@ -48,6 +48,7 @@ TICK = "'" + specialsre = re.compile(r'[][\\()<>@,:;".]') + escapesre = re.compile(r'[\\"]') + ++ + def _has_surrogates(s): + """Return True if s contains surrogate-escaped binary data.""" + # This check is based on the fact that unless there are surrogates, utf8 +@@ -105,11 +106,126 @@ def formataddr(pair, charset='utf-8'): + return '%s%s%s <%s>' % (quotes, name, quotes, address) + return address + +-def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" +- all = COMMASPACE.join(str(v) for v in fieldvalues) +- a = _AddressList(all) +- return a.addresslist ++def _iter_escaped_chars(addr): ++ pos = 0 ++ escape = False ++ for pos, ch in enumerate(addr): ++ if escape: ++ yield (pos, '\\' + ch) ++ escape = False ++ elif ch == '\\': ++ escape = True ++ else: ++ yield (pos, ch) ++ if escape: ++ yield (pos, '\\') ++ ++ ++def _strip_quoted_realnames(addr): ++ """Strip real names between quotes.""" ++ if '"' not in addr: ++ # Fast path ++ return addr ++ ++ start = 0 ++ open_pos = None ++ result = [] ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '"': ++ if open_pos is None: ++ open_pos = pos ++ else: ++ if start != open_pos: ++ result.append(addr[start:open_pos]) ++ start = pos + 1 ++ open_pos = None ++ ++ if start < len(addr): ++ result.append(addr[start:]) ++ ++ return ''.join(result) ++ ++supports_strict_parsing = True ++ ++def getaddresses(fieldvalues, *, strict=True): ++ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. ++ ++ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in ++ its place. ++ ++ If strict is true, use a strict parser which rejects malformed inputs. ++ """ ++ ++ # If strict is true, if the resulting list of parsed addresses is greater ++ # than the number of fieldvalues in the input list, a parsing error has ++ # occurred and consequently a list containing a single empty 2-tuple [('', ++ # '')] is returned in its place. This is done to avoid invalid output. ++ # ++ # Malformed input: getaddresses(['alice@example.com ']) ++ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] ++ # Safe output: [('', '')] ++ ++ if not strict: ++ all = COMMASPACE.join(str(v) for v in fieldvalues) ++ a = _AddressList(all) ++ return a.addresslist ++ ++ fieldvalues = [str(v) for v in fieldvalues] ++ fieldvalues = _pre_parse_validation(fieldvalues) ++ addr = COMMASPACE.join(fieldvalues) ++ a = _AddressList(addr) ++ result = _post_parse_validation(a.addresslist) ++ ++ # Treat output as invalid if the number of addresses is not equal to the ++ # expected number of addresses. ++ n = 0 ++ for v in fieldvalues: ++ # When a comma is used in the Real Name part it is not a deliminator. ++ # So strip those out before counting the commas. ++ v = _strip_quoted_realnames(v) ++ # Expected number of addresses: 1 + number of commas ++ n += 1 + v.count(',') ++ if len(result) != n: ++ return [('', '')] ++ ++ return result ++ ++ ++def _check_parenthesis(addr): ++ # Ignore parenthesis in quoted real names. ++ addr = _strip_quoted_realnames(addr) ++ ++ opens = 0 ++ for pos, ch in _iter_escaped_chars(addr): ++ if ch == '(': ++ opens += 1 ++ elif ch == ')': ++ opens -= 1 ++ if opens < 0: ++ return False ++ return (opens == 0) ++ ++ ++def _pre_parse_validation(email_header_fields): ++ accepted_values = [] ++ for v in email_header_fields: ++ if not _check_parenthesis(v): ++ v = "('', '')" ++ accepted_values.append(v) ++ ++ return accepted_values ++ ++ ++def _post_parse_validation(parsed_email_header_tuples): ++ accepted_values = [] ++ # The parser would have parsed a correctly formatted domain-literal ++ # The existence of an [ after parsing indicates a parsing failure ++ for v in parsed_email_header_tuples: ++ if '[' in v[1]: ++ v = ('', '') ++ accepted_values.append(v) ++ ++ return accepted_values + + + def _format_timetuple_and_zone(timetuple, zone): +@@ -203,16 +319,33 @@ def parsedate_to_datetime(data): + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + + +-def parseaddr(addr): ++def parseaddr(addr, *, strict=True): + """ + Parse addr into its constituent realname and email address parts. + + Return a tuple of realname and email address, unless the parse fails, in + which case return a 2-tuple of ('', ''). ++ ++ If strict is True, use a strict parser which rejects malformed inputs. + """ +- addrs = _AddressList(addr).addresslist +- if not addrs: +- return '', '' ++ if not strict: ++ addrs = _AddressList(addr).addresslist ++ if not addrs: ++ return ('', '') ++ return addrs[0] ++ ++ if isinstance(addr, list): ++ addr = addr[0] ++ ++ if not isinstance(addr, str): ++ return ('', '') ++ ++ addr = _pre_parse_validation([addr])[0] ++ addrs = _post_parse_validation(_AddressList(addr).addresslist) ++ ++ if not addrs or len(addrs) > 1: ++ return ('', '') ++ + return addrs[0] + + +diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py +index 473a488..c4b0a4e 100644 +--- a/Lib/test/test_email/test_email.py ++++ b/Lib/test/test_email/test_email.py +@@ -17,6 +17,7 @@ from unittest.mock import patch + + import email + import email.policy ++import email.utils + + from email.charset import Charset + from email.generator import Generator, DecodedGenerator, BytesGenerator +@@ -3338,15 +3339,137 @@ Foo + ], + ) + ++ def test_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056""" ++ alice = 'alice@example.org' ++ bob = 'bob@example.com' ++ empty = ('', '') ++ ++ # Test utils.getaddresses() and utils.parseaddr() on malformed email ++ # addresses: default behavior (strict=True) rejects malformed address, ++ # and strict=False which tolerates malformed address. ++ for invalid_separator, expected_non_strict in ( ++ ('(', [(f'<{bob}>', alice)]), ++ (')', [('', alice), empty, ('', bob)]), ++ ('<', [('', alice), empty, ('', bob), empty]), ++ ('>', [('', alice), empty, ('', bob)]), ++ ('[', [('', f'{alice}[<{bob}>]')]), ++ (']', [('', alice), empty, ('', bob)]), ++ ('@', [empty, empty, ('', bob)]), ++ (';', [('', alice), empty, ('', bob)]), ++ (':', [('', alice), ('', bob)]), ++ ('.', [('', alice + '.'), ('', bob)]), ++ ('"', [('', alice), ('', f'<{bob}>')]), ++ ): ++ address = f'{alice}{invalid_separator}<{bob}>' ++ with self.subTest(address=address): ++ self.assertEqual(utils.getaddresses([address]), ++ [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ expected_non_strict) ++ ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Comma (',') is treated differently depending on strict parameter. ++ # Comma without quotes. ++ address = f'{alice},<{bob}>' ++ self.assertEqual(utils.getaddresses([address]), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', alice), ('', bob)]) ++ self.assertEqual(utils.parseaddr([address]), ++ empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Real name between quotes containing comma. ++ address = '"Alice, alice@example.org" ' ++ expected_strict = ('Alice, alice@example.org', 'bob@example.com') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Valid parenthesis in comments. ++ address = 'alice@example.org (Alice)' ++ expected_strict = ('Alice', 'alice@example.org') ++ self.assertEqual(utils.getaddresses([address]), [expected_strict]) ++ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict]) ++ self.assertEqual(utils.parseaddr([address]), expected_strict) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Invalid parenthesis in comments. ++ address = 'alice@example.org )Alice(' ++ self.assertEqual(utils.getaddresses([address]), [empty]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('', 'alice@example.org'), ('', ''), ('', 'Alice')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Two addresses with quotes separated by comma. ++ address = '"Jane Doe" , "John Doe" ' ++ self.assertEqual(utils.getaddresses([address]), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.getaddresses([address], strict=False), ++ [('Jane Doe', 'jane@example.net'), ++ ('John Doe', 'john@example.net')]) ++ self.assertEqual(utils.parseaddr([address]), empty) ++ self.assertEqual(utils.parseaddr([address], strict=False), ++ ('', address)) ++ ++ # Test email.utils.supports_strict_parsing attribute ++ self.assertEqual(email.utils.supports_strict_parsing, True) ++ + def test_getaddresses_nasty(self): +- eq = self.assertEqual +- eq(utils.getaddresses(['foo: ;']), [('', '')]) +- eq(utils.getaddresses( +- ['[]*-- =~$']), +- [('', ''), ('', ''), ('', '*--')]) +- eq(utils.getaddresses( +- ['foo: ;', '"Jason R. Mastaler" ']), +- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) ++ for addresses, expected in ( ++ (['"Sürname, Firstname" '], ++ [('Sürname, Firstname', 'to@example.com')]), ++ ++ (['foo: ;'], ++ [('', '')]), ++ ++ (['foo: ;', '"Jason R. Mastaler" '], ++ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]), ++ ++ ([r'Pete(A nice \) chap) '], ++ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]), ++ ++ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'], ++ [('', '')]), ++ ++ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'], ++ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]), ++ ++ (['John Doe '], ++ [('John Doe (comment)', 'jdoe@machine.example')]), ++ ++ (['"Mary Smith: Personal Account" '], ++ [('Mary Smith: Personal Account', 'smith@home.example')]), ++ ++ (['Undisclosed recipients:;'], ++ [('', '')]), ++ ++ ([r', "Giant; \"Big\" Box" '], ++ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]), ++ ): ++ with self.subTest(addresses=addresses): ++ self.assertEqual(utils.getaddresses(addresses), ++ expected) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ expected) ++ ++ addresses = ['[]*-- =~$'] ++ self.assertEqual(utils.getaddresses(addresses), ++ [('', '')]) ++ self.assertEqual(utils.getaddresses(addresses, strict=False), ++ [('', ''), ('', ''), ('', '*--')]) + + def test_getaddresses_embedded_comment(self): + """Test proper handling of a nested comment""" +@@ -3537,6 +3660,53 @@ multipart/report + m = cls(*constructor, policy=email.policy.default) + self.assertIs(m.policy, email.policy.default) + ++ def test_iter_escaped_chars(self): ++ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')), ++ [(0, 'a'), ++ (2, '\\\\'), ++ (3, 'b'), ++ (5, '\\"'), ++ (6, 'c'), ++ (8, '\\\\'), ++ (9, '"'), ++ (10, 'd')]) ++ self.assertEqual(list(utils._iter_escaped_chars('a\\')), ++ [(0, 'a'), (1, '\\')]) ++ ++ def test_strip_quoted_realnames(self): ++ def check(addr, expected): ++ self.assertEqual(utils._strip_quoted_realnames(addr), expected) ++ ++ check('"Jane Doe" , "John Doe" ', ++ ' , ') ++ check(r'"Jane \"Doe\"." ', ++ ' ') ++ ++ # special cases ++ check(r'before"name"after', 'beforeafter') ++ check(r'before"name"', 'before') ++ check(r'b"name"', 'b') # single char ++ check(r'"name"after', 'after') ++ check(r'"name"a', 'a') # single char ++ check(r'"name"', '') ++ ++ # no change ++ for addr in ( ++ 'Jane Doe , John Doe ', ++ 'lone " quote', ++ ): ++ self.assertEqual(utils._strip_quoted_realnames(addr), addr) ++ ++ ++ def test_check_parenthesis(self): ++ addr = 'alice@example.net' ++ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice(')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))')) ++ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)')) ++ ++ # Ignore real name between quotes ++ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}')) + + # Test the iterator/generators + class TestIterators(TestEmailBase): +diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst +new file mode 100644 +index 0000000..3d0e9e4 +--- /dev/null ++++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst +@@ -0,0 +1,8 @@ ++:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now ++return ``('', '')`` 2-tuples in more situations where invalid email ++addresses are encountered instead of potentially inaccurate values. Add ++optional *strict* parameter to these two functions: use ``strict=False`` to ++get the old behavior, accept malformed inputs. ++``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check ++if the *strict* paramater is available. Patch by Thomas Dwyer and Victor ++Stinner to improve the CVE-2023-27043 fix. +-- +2.27.0 + diff --git a/backport-Fix-parsing-errors-in-email-_parseaddr.py.patch b/backport-Fix-parsing-errors-in-email-_parseaddr.py.patch new file mode 100644 index 0000000..859c1f9 --- /dev/null +++ b/backport-Fix-parsing-errors-in-email-_parseaddr.py.patch @@ -0,0 +1,265 @@ +From f4529f55d3ea4e68f90fbdd2e8a37c43986aa1bf Mon Sep 17 00:00:00 2001 +From: Thomas Dwyer +Date: Mon, 10 Jul 2023 18:00:55 -0500 +Subject: [PATCH] [Backport] Fix parsing errors in email/_parseaddr.py + +Reference: https://github.com/python/cpython/issues/102988 + +The e-mail module of Python 0 - 2.7.18, 3.x - 3.11 incorrectly parses e-mail addresses which contain a special character. This vulnerability allows attackers to send messages from e-mail addresses that would otherwise be rejected. + +Offering: CloudBu CMP + +CVE: CVE-2023-27043 +--- + Doc/library/email.utils.rst | 26 +++++- + Lib/email/utils.py | 61 ++++++++++++-- + Lib/test/test_email/test_email.py | 81 ++++++++++++++++++- + ...-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 4 + + 4 files changed, 162 insertions(+), 10 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst + +diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst +index 0e266b6..06f8169 100644 +--- a/Doc/library/email.utils.rst ++++ b/Doc/library/email.utils.rst +@@ -67,6 +67,11 @@ of the new API. + *email address* parts. Returns a tuple of that information, unless the parse + fails, in which case a 2-tuple of ``('', '')`` is returned. + ++ .. versionchanged:: 3.12 ++ For security reasons, addresses that were ambiguous and could parse into ++ multiple different addresses now cause ``('', '')`` to be returned ++ instead of only one of the *potential* addresses. ++ + + .. function:: formataddr(pair, charset='utf-8') + +@@ -89,7 +94,7 @@ of the new API. + This method returns a list of 2-tuples of the form returned by ``parseaddr()``. + *fieldvalues* is a sequence of header field values as might be returned by + :meth:`Message.get_all `. Here's a simple +- example that gets all the recipients of a message:: ++ example that gets all the recipients of a message: + + from email.utils import getaddresses + +@@ -99,6 +104,25 @@ of the new API. + resent_ccs = msg.get_all('resent-cc', []) + all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + ++ When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` ++ is returned in its place. Other errors in parsing the list of ++ addresses such as a fieldvalue seemingly parsing into multiple ++ addresses may result in a list containing a single empty 2-tuple ++ ``[('', '')]`` being returned rather than returning potentially ++ invalid output. ++ ++ Example malformed input parsing: ++ ++ .. doctest:: ++ ++ >>> from email.utils import getaddresses ++ >>> getaddresses(['alice@example.com ', 'me@example.com']) ++ [('', '')] ++ ++ .. versionchanged:: 3.12 ++ The 2-tuple of ``('', '')`` in the returned values when parsing ++ fails were added as to address a security issue. ++ + + .. function:: parsedate(date) + +diff --git a/Lib/email/utils.py b/Lib/email/utils.py +index cfdfeb3..f8e867a 100644 +--- a/Lib/email/utils.py ++++ b/Lib/email/utils.py +@@ -105,13 +105,53 @@ def formataddr(pair, charset='utf-8'): + return '%s%s%s <%s>' % (quotes, name, quotes, address) + return address + ++def _pre_parse_validation(email_header_fields): ++ accepted_values = [] ++ for v in email_header_fields: ++ s = v.replace('\\(', '').replace('\\)', '') ++ if s.count('(') != s.count(')'): ++ v = "('', '')" ++ accepted_values.append(v) + ++ return accepted_values ++ ++ ++def _post_parse_validation(parsed_email_header_tuples): ++ accepted_values = [] ++ # The parser would have parsed a correctly formatted domain-literal ++ # The existence of an [ after parsing indicates a parsing failure ++ for v in parsed_email_header_tuples: ++ if '[' in v[1]: ++ v = ('', '') ++ accepted_values.append(v) ++ ++ return accepted_values + + def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" +- all = COMMASPACE.join(str(v) for v in fieldvalues) ++ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. ++ ++ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in ++ its place. ++ ++ If the resulting list of parsed address is not the same as the number of ++ fieldvalues in the input list a parsing error has occurred. A list ++ containing a single empty 2-tuple [('', '')] is returned in its place. ++ This is done to avoid invalid output. ++ """ ++ fieldvalues = [str(v) for v in fieldvalues] ++ fieldvalues = _pre_parse_validation(fieldvalues) ++ all = COMMASPACE.join(v for v in fieldvalues) + a = _AddressList(all) +- return a.addresslist ++ result = _post_parse_validation(a.addresslist) ++ ++ n = 0 ++ for v in fieldvalues: ++ n += v.count(',') + 1 ++ ++ if len(result) != n: ++ return [('', '')] ++ ++ return result + + + def _format_timetuple_and_zone(timetuple, zone): +@@ -212,9 +252,18 @@ def parseaddr(addr): + Return a tuple of realname and email address, unless the parse fails, in + which case return a 2-tuple of ('', ''). + """ +- addrs = _AddressList(addr).addresslist +- if not addrs: +- return '', '' ++ if isinstance(addr, list): ++ addr = addr[0] ++ ++ if not isinstance(addr, str): ++ return ('', '') ++ ++ addr = _pre_parse_validation([addr])[0] ++ addrs = _post_parse_validation(_AddressList(addr).addresslist) ++ ++ if not addrs or len(addrs) > 1: ++ return ('', '') ++ + return addrs[0] + + +diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py +index 677f209..f43d586 100644 +--- a/Lib/test/test_email/test_email.py ++++ b/Lib/test/test_email/test_email.py +@@ -3321,15 +3321,90 @@ Foo + [('Al Person', 'aperson@dom.ain'), + ('Bud Person', 'bperson@dom.ain')]) + ++ def test_getaddresses_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043""" ++ eq = self.assertEqual ++ eq(utils.getaddresses(['alice@example.org(']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org)']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org<']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org>']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org@']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org,']), ++ [('', 'alice@example.org'), ('', 'bob@example.com')]) ++ eq(utils.getaddresses(['alice@example.org;']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org:']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org.']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org"']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org[']), ++ [('', '')]) ++ eq(utils.getaddresses(['alice@example.org]']), ++ [('', '')]) ++ ++ def test_parseaddr_parsing_errors(self): ++ """Test for parsing errors from CVE-2023-27043""" ++ eq = self.assertEqual ++ eq(utils.parseaddr(['alice@example.org(']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org)']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org<']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org>']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org@']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org,']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org;']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org:']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org.']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org"']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org[']), ++ ('', '')) ++ eq(utils.parseaddr(['alice@example.org]']), ++ ('', '')) ++ + def test_getaddresses_nasty(self): + eq = self.assertEqual + eq(utils.getaddresses(['foo: ;']), [('', '')]) +- eq(utils.getaddresses( +- ['[]*-- =~$']), +- [('', ''), ('', ''), ('', '*--')]) ++ eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) + eq(utils.getaddresses( + ['foo: ;', '"Jason R. Mastaler" ']), + [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) ++ eq(utils.getaddresses( ++ [r'Pete(A nice \) chap) ']), ++ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) ++ eq(utils.getaddresses( ++ ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), ++ [('', '')]) ++ eq(utils.getaddresses( ++ ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), ++ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) ++ eq(utils.getaddresses( ++ ['John Doe ']), ++ [('John Doe (comment)', 'jdoe@machine.example')]) ++ eq(utils.getaddresses( ++ ['"Mary Smith: Personal Account" ']), ++ [('Mary Smith: Personal Account', 'smith@home.example')]) ++ eq(utils.getaddresses( ++ ['Undisclosed recipients:;']), ++ [('', '')]) ++ eq(utils.getaddresses( ++ [r', "Giant; \"Big\" Box" ']), ++ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) + + def test_getaddresses_embedded_comment(self): + """Test proper handling of a nested comment""" +diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst +new file mode 100644 +index 0000000..e0434cc +--- /dev/null ++++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst +@@ -0,0 +1,4 @@ ++CVE-2023-27043: Prevent :func:`email.utils.parseaddr` ++and :func:`email.utils.getaddresses` from returning the realname portion of an ++invalid RFC2822 email header in the email address portion of the 2-tuple ++returned after being parsed by :class:`email._parseaddr.AddressList`. +-- +2.27.0 + diff --git a/backport-Revert-fixes-for-CVE-2023-27043.patch b/backport-Revert-fixes-for-CVE-2023-27043.patch new file mode 100644 index 0000000..193d510 --- /dev/null +++ b/backport-Revert-fixes-for-CVE-2023-27043.patch @@ -0,0 +1,284 @@ +From a6f6faf3b669c409804fcaf4e62dd9bd257d2178 Mon Sep 17 00:00:00 2001 +From: "Gregory P. Smith" +Date: Thu, 20 Jul 2023 20:30:52 -0700 +Subject: [PATCH] [Backport] Revert fixes for CVE-2023-27043 + +Reference: https://github.com/python/cpython/pull/106733 + +Revert "gh-102988: Detect email address parsing errors and return empty tuple to indicate the parsing error (old API) (#105127)" +This reverts commit and adds the regression test suggested in the issue. + +Offering: CloudBu CMP + +CVE: CVE-2023-27043 +--- + Doc/library/email.utils.rst | 26 +---- + Lib/email/utils.py | 63 ++---------- + Lib/test/test_email/test_email.py | 96 ++++--------------- + ...-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst | 8 +- + 4 files changed, 30 insertions(+), 163 deletions(-) + +diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst +index 06f8169..0e266b6 100644 +--- a/Doc/library/email.utils.rst ++++ b/Doc/library/email.utils.rst +@@ -67,11 +67,6 @@ of the new API. + *email address* parts. Returns a tuple of that information, unless the parse + fails, in which case a 2-tuple of ``('', '')`` is returned. + +- .. versionchanged:: 3.12 +- For security reasons, addresses that were ambiguous and could parse into +- multiple different addresses now cause ``('', '')`` to be returned +- instead of only one of the *potential* addresses. +- + + .. function:: formataddr(pair, charset='utf-8') + +@@ -94,7 +89,7 @@ of the new API. + This method returns a list of 2-tuples of the form returned by ``parseaddr()``. + *fieldvalues* is a sequence of header field values as might be returned by + :meth:`Message.get_all `. Here's a simple +- example that gets all the recipients of a message: ++ example that gets all the recipients of a message:: + + from email.utils import getaddresses + +@@ -104,25 +99,6 @@ of the new API. + resent_ccs = msg.get_all('resent-cc', []) + all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + +- When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')`` +- is returned in its place. Other errors in parsing the list of +- addresses such as a fieldvalue seemingly parsing into multiple +- addresses may result in a list containing a single empty 2-tuple +- ``[('', '')]`` being returned rather than returning potentially +- invalid output. +- +- Example malformed input parsing: +- +- .. doctest:: +- +- >>> from email.utils import getaddresses +- >>> getaddresses(['alice@example.com ', 'me@example.com']) +- [('', '')] +- +- .. versionchanged:: 3.12 +- The 2-tuple of ``('', '')`` in the returned values when parsing +- fails were added as to address a security issue. +- + + .. function:: parsedate(date) + +diff --git a/Lib/email/utils.py b/Lib/email/utils.py +index f8e867a..95620b7 100644 +--- a/Lib/email/utils.py ++++ b/Lib/email/utils.py +@@ -105,53 +105,11 @@ def formataddr(pair, charset='utf-8'): + return '%s%s%s <%s>' % (quotes, name, quotes, address) + return address + +-def _pre_parse_validation(email_header_fields): +- accepted_values = [] +- for v in email_header_fields: +- s = v.replace('\\(', '').replace('\\)', '') +- if s.count('(') != s.count(')'): +- v = "('', '')" +- accepted_values.append(v) +- +- return accepted_values +- +- +-def _post_parse_validation(parsed_email_header_tuples): +- accepted_values = [] +- # The parser would have parsed a correctly formatted domain-literal +- # The existence of an [ after parsing indicates a parsing failure +- for v in parsed_email_header_tuples: +- if '[' in v[1]: +- v = ('', '') +- accepted_values.append(v) +- +- return accepted_values +- + def getaddresses(fieldvalues): +- """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. +- +- When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in +- its place. +- +- If the resulting list of parsed address is not the same as the number of +- fieldvalues in the input list a parsing error has occurred. A list +- containing a single empty 2-tuple [('', '')] is returned in its place. +- This is done to avoid invalid output. +- """ +- fieldvalues = [str(v) for v in fieldvalues] +- fieldvalues = _pre_parse_validation(fieldvalues) +- all = COMMASPACE.join(v for v in fieldvalues) ++ """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" ++ all = COMMASPACE.join(str(v) for v in fieldvalues) + a = _AddressList(all) +- result = _post_parse_validation(a.addresslist) +- +- n = 0 +- for v in fieldvalues: +- n += v.count(',') + 1 +- +- if len(result) != n: +- return [('', '')] +- +- return result ++ return a.addresslist + + + def _format_timetuple_and_zone(timetuple, zone): +@@ -252,18 +210,9 @@ def parseaddr(addr): + Return a tuple of realname and email address, unless the parse fails, in + which case return a 2-tuple of ('', ''). + """ +- if isinstance(addr, list): +- addr = addr[0] +- +- if not isinstance(addr, str): +- return ('', '') +- +- addr = _pre_parse_validation([addr])[0] +- addrs = _post_parse_validation(_AddressList(addr).addresslist) +- +- if not addrs or len(addrs) > 1: +- return ('', '') +- ++ addrs = _AddressList(addr).addresslist ++ if not addrs: ++ return '', '' + return addrs[0] + + +diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py +index f43d586..473a488 100644 +--- a/Lib/test/test_email/test_email.py ++++ b/Lib/test/test_email/test_email.py +@@ -3321,90 +3321,32 @@ Foo + [('Al Person', 'aperson@dom.ain'), + ('Bud Person', 'bperson@dom.ain')]) + +- def test_getaddresses_parsing_errors(self): +- """Test for parsing errors from CVE-2023-27043""" +- eq = self.assertEqual +- eq(utils.getaddresses(['alice@example.org(']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org)']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org<']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org>']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org@']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org,']), +- [('', 'alice@example.org'), ('', 'bob@example.com')]) +- eq(utils.getaddresses(['alice@example.org;']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org:']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org.']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org"']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org[']), +- [('', '')]) +- eq(utils.getaddresses(['alice@example.org]']), +- [('', '')]) +- +- def test_parseaddr_parsing_errors(self): +- """Test for parsing errors from CVE-2023-27043""" +- eq = self.assertEqual +- eq(utils.parseaddr(['alice@example.org(']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org)']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org<']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org>']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org@']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org,']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org;']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org:']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org.']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org"']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org[']), +- ('', '')) +- eq(utils.parseaddr(['alice@example.org]']), +- ('', '')) ++ def test_getaddresses_comma_in_name(self): ++ """GH-106669 regression test.""" ++ self.assertEqual( ++ utils.getaddresses( ++ [ ++ '"Bud, Person" ', ++ 'aperson@dom.ain (Al Person)', ++ '"Mariusz Felisiak" ', ++ ] ++ ), ++ [ ++ ('Bud, Person', 'bperson@dom.ain'), ++ ('Al Person', 'aperson@dom.ain'), ++ ('Mariusz Felisiak', 'to@example.com'), ++ ], ++ ) + + def test_getaddresses_nasty(self): + eq = self.assertEqual + eq(utils.getaddresses(['foo: ;']), [('', '')]) +- eq(utils.getaddresses(['[]*-- =~$']), [('', '')]) ++ eq(utils.getaddresses( ++ ['[]*-- =~$']), ++ [('', ''), ('', ''), ('', '*--')]) + eq(utils.getaddresses( + ['foo: ;', '"Jason R. Mastaler" ']), + [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]) +- eq(utils.getaddresses( +- [r'Pete(A nice \) chap) ']), +- [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]) +- eq(utils.getaddresses( +- ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']), +- [('', '')]) +- eq(utils.getaddresses( +- ['Mary <@machine.tld:mary@example.net>, , jdoe@test . example']), +- [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]) +- eq(utils.getaddresses( +- ['John Doe ']), +- [('John Doe (comment)', 'jdoe@machine.example')]) +- eq(utils.getaddresses( +- ['"Mary Smith: Personal Account" ']), +- [('Mary Smith: Personal Account', 'smith@home.example')]) +- eq(utils.getaddresses( +- ['Undisclosed recipients:;']), +- [('', '')]) +- eq(utils.getaddresses( +- [r', "Giant; \"Big\" Box" ']), +- [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]) + + def test_getaddresses_embedded_comment(self): + """Test proper handling of a nested comment""" +diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst +index e0434cc..c67ec45 100644 +--- a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst ++++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst +@@ -1,4 +1,4 @@ +-CVE-2023-27043: Prevent :func:`email.utils.parseaddr` +-and :func:`email.utils.getaddresses` from returning the realname portion of an +-invalid RFC2822 email header in the email address portion of the 2-tuple +-returned after being parsed by :class:`email._parseaddr.AddressList`. ++Reverted the :mod:`email.utils` security improvement change released in ++3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail ++to parse email addresses with a comma in the quoted name field. ++See :gh:`106669`. +-- +2.27.0 + diff --git a/python3.spec b/python3.spec index 4d35056..2b4af25 100644 --- a/python3.spec +++ b/python3.spec @@ -3,7 +3,7 @@ Summary: Interpreter of the Python3 programming language URL: https://www.python.org/ Version: 3.11.6 -Release: 5 +Release: 6 License: Python-2.0 %global branchversion 3.11 @@ -92,6 +92,10 @@ Source1: pyconfig.h Patch1: 00001-rpath.patch Patch251: 00251-change-user-install-location.patch +Patch6011: backport-Fix-parsing-errors-in-email-_parseaddr.py.patch +Patch6012: backport-Revert-fixes-for-CVE-2023-27043.patch +Patch6013: backport-CVE-2023-27043.patch + Patch9000: add-the-sm3-method-for-obtaining-the-salt-value.patch Patch9001: 0001-add-loongarch64-support-for-python.patch Patch9002: backport-3.11-gh-114572-Fix-locking-in-cert_store_stats-and-g.patch @@ -193,6 +197,10 @@ rm configure pyconfig.h.in %patch1 -p1 %patch251 -p1 +%patch6011 -p1 +%patch6012 -p1 +%patch6013 -p1 + %patch9000 -p1 %patch9001 -p1 %patch9002 -p1 @@ -860,6 +868,12 @@ export BEP_GTDLIST="$BEP_GTDLIST_TMP" %{_mandir}/*/* %changelog +* Tue Nov 05 2024 GuoCe - 3.11.6-6 +- Type:CVE +- CVE:CVE-2023-27043 +- SUG:NA +- DESC:fix CVE-2023-27043 + * Thu Jul 11 2024 Dingli Zhang - 3.11.6-5 - Type:enhancement - ID:NA -- Gitee