diff --git a/fix-CVE-2025-6069.patch b/fix-CVE-2025-6069.patch new file mode 100644 index 0000000000000000000000000000000000000000..e2f52e39756b03ff0c3bf5547e9b57e8aeb2b378 --- /dev/null +++ b/fix-CVE-2025-6069.patch @@ -0,0 +1,239 @@ +From 089c6aa56d4e826ef67a492be4b832764273a937 Mon Sep 17 00:00:00 2001 +From: Serhiy Storchaka +Date: Fri, 13 Jun 2025 19:57:48 +0300 +Subject: [PATCH] [3.11] gh-135462: Fix quadratic complexity in processing + special input in HTMLParser (GH-135464) + +End-of-file errors are now handled according to the HTML5 specs -- +comments and declarations are automatically closed, tags are ignored. +(cherry picked from commit 6eb6c5dbfb528bd07d77b60fd71fd05d81d45c41) + +Co-authored-by: Serhiy Storchaka +--- + Lib/html/parser.py | 41 +++++--- + Lib/test/test_htmlparser.py | 95 ++++++++++++++++--- + ...-06-13-15-55-22.gh-issue-135462.KBeJpc.rst | 4 + + 3 files changed, 117 insertions(+), 23 deletions(-) + create mode 100644 Misc/NEWS.d/next/Security/2025-06-13-15-55-22.gh-issue-135462.KBeJpc.rst + +diff --git a/Lib/html/parser.py b/Lib/html/parser.py +index bef0f4fe4bf776..9c38008bbfd06b 100644 +--- a/Lib/html/parser.py ++++ b/Lib/html/parser.py +@@ -25,6 +25,7 @@ + charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') + + starttagopen = re.compile('<[a-zA-Z]') ++endtagopen = re.compile('') + commentclose = re.compile(r'--\s*>') + # Note: +@@ -176,7 +177,7 @@ def goahead(self, end): + k = self.parse_pi(i) + elif startswith("', i + 1) +- if k < 0: +- k = rawdata.find('<', i + 1) +- if k < 0: +- k = i + 1 +- else: +- k += 1 +- if self.convert_charrefs and not self.cdata_elem: +- self.handle_data(unescape(rawdata[i:k])) ++ if starttagopen.match(rawdata, i): # < + letter ++ pass ++ elif startswith("'), +- ('comment', '/img'), +- ('endtag', 'html<')]) ++ ('data', '\n')]) + + def test_starttag_junk_chars(self): ++ self._run_check("<", [('data', '<')]) ++ self._run_check("<>", [('data', '<>')]) ++ self._run_check("< >", [('data', '< >')]) ++ self._run_check("< ", [('data', '< ')]) + self._run_check("", []) ++ self._run_check("<$>", [('data', '<$>')]) + self._run_check("", [('comment', '$')]) + self._run_check("", [('endtag', 'a')]) ++ self._run_check("", [('starttag', 'a", [('endtag', 'a'", [('data', "'", []) ++ self._run_check("", [('starttag', 'a$b', [])]) + self._run_check("", [('startendtag', 'a$b', [])]) + self._run_check("", [('starttag', 'a$b', [])]) + self._run_check("", [('startendtag', 'a$b', [])]) ++ self._run_check("", [('endtag', 'a$b')]) + + def test_slashes_in_starttag(self): + self._run_check('', [('startendtag', 'a', [('foo', 'var')])]) +@@ -537,13 +545,56 @@ def test_EOF_in_charref(self): + for html, expected in data: + self._run_check(html, expected) + +- def test_broken_comments(self): +- html = ('' ++ def test_eof_in_comments(self): ++ data = [ ++ ('', [('comment', '-!>')]), ++ ('' + '' + '' + '') + expected = [ ++ ('comment', 'ELEMENT br EMPTY'), + ('comment', ' not really a comment '), + ('comment', ' not a comment either --'), + ('comment', ' -- close enough --'), +@@ -598,6 +649,26 @@ def test_convert_charrefs_dropped_text(self): + ('endtag', 'a'), ('data', ' bar & baz')] + ) + ++ @support.requires_resource('cpu') ++ def test_eof_no_quadratic_complexity(self): ++ # Each of these examples used to take about an hour. ++ # Now they take a fraction of a second. ++ def check(source): ++ parser = html.parser.HTMLParser() ++ parser.feed(source) ++ parser.close() ++ n = 120_000 ++ check(" - 3.11.6-8 +- Add patch to fix CVE-2025-6069 + * Wed Aug 06 2025 wenxin - 3.11.6-7 - Add patch to fix CVE-2025-8194