diff --git a/backport-CVE-2025-6069.patch b/backport-CVE-2025-6069.patch new file mode 100644 index 0000000000000000000000000000000000000000..7c16a573058be146b77a1e8708f8d9889acd1380 --- /dev/null +++ b/backport-CVE-2025-6069.patch @@ -0,0 +1,240 @@ +From f3c6f882cddc8dc30320d2e73edf019e201394fc Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Fri, 4 Jul 2025 00:05:46 +0300 +Subject: [PATCH] [3.11] gh-135462: Fix quadratic complexity in processing + special input in HTMLParser (GH-135464) (GH-135484) + +End-of-file errors are now handled according to the HTML5 specs -- +comments and declarations are automatically closed, tags are ignored. +(cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41) +--- + Lib/html/parser.py | 41 +++++--- + Lib/test/test_htmlparser.py | 95 ++++++++++++++++--- + ...-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4 + + 3 files changed, 117 insertions(+), 23 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst + +diff --git a/Lib/html/parser.py b/Lib/html/parser.py +index bef0f4fe4bf..9c38008bbfd 100644 +--- a/Lib/html/parser.py ++++ b/Lib/html/parser.py +@@ -25,6 +25,7 @@ + charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') + + starttagopen = re.compile('<[a-zA-Z]') ++endtagopen = re.compile('') + commentclose = re.compile(r'--\s*>') + # Note: +@@ -176,7 +177,7 @@ def goahead(self, end): + k = self.parse_pi(i) + elif startswith("', i + 1) +- if k < 0: +- k = rawdata.find('<', i + 1) +- if k < 0: +- k = i + 1 +- else: +- k += 1 +- if self.convert_charrefs and not self.cdata_elem: +- self.handle_data(unescape(rawdata[i:k])) ++ if starttagopen.match(rawdata, i): # < + letter ++ pass ++ elif startswith("'), +- ('comment', '/img'), +- ('endtag', 'html<')]) ++ ('data', '\n')]) + + def test_starttag_junk_chars(self): ++ self._run_check("<", [('data', '<')]) ++ self._run_check("<>", [('data', '<>')]) ++ self._run_check("< >", [('data', '< >')]) ++ self._run_check("< ", [('data', '< ')]) + self._run_check("", []) ++ self._run_check("<$>", [('data', '<$>')]) + self._run_check("", [('comment', '$')]) + self._run_check("", [('endtag', 'a')]) ++ self._run_check("", [('starttag', 'a", [('endtag', 'a'", [('data', "'", []) ++ self._run_check("", [('starttag', 'a$b', [])]) + self._run_check("", [('startendtag', 'a$b', [])]) + self._run_check("", [('starttag', 'a$b', [])]) + self._run_check("", [('startendtag', 'a$b', [])]) ++ self._run_check("", [('endtag', 'a$b')]) + + def test_slashes_in_starttag(self): + self._run_check('', [('startendtag', 'a', [('foo', 'var')])]) +@@ -537,13 +545,56 @@ def test_EOF_in_charref(self): + for html, expected in data: + self._run_check(html, expected) + +- def test_broken_comments(self): +- html = ('' ++ def test_eof_in_comments(self): ++ data = [ ++ ('', [('comment', '-!>')]), ++ ('' + '' + '' + '') + expected = [ ++ ('comment', 'ELEMENT br EMPTY'), + ('comment', ' not really a comment '), + ('comment', ' not a comment either --'), + ('comment', ' -- close enough --'), +@@ -598,6 +649,26 @@ def test_convert_charrefs_dropped_text(self): + ('endtag', 'a'), ('data', ' bar & baz')] + ) + ++ @support.requires_resource('cpu') ++ def test_eof_no_quadratic_complexity(self): ++ # Each of these examples used to take about an hour. ++ # Now they take a fraction of a second. ++ def check(source): ++ parser = html.parser.HTMLParser() ++ parser.feed(source) ++ parser.close() ++ n = 120_000 ++ check(" - 3.11.6-17 +- Type:CVE +- CVE:CVE-2025-6069 +- SUG:NA +- DESC:fix CVE-2025-6069 + * Thu Sep 04 2025 lipengyu - 3.11.6-16 - Type:CVE - CVE:CVE-2025-4516