From 7f038daa2b090830d212219e85c4d632125551b6 Mon Sep 17 00:00:00 2001 From: shixuantong Date: Wed, 22 Dec 2021 15:57:49 +0800 Subject: [PATCH] fix CVE-2021-43818 (cherry picked from commit cde04e59f083d65f903dd1d0388883ec14af277b) --- backport-0001-CVE-2021-43818.patch | 59 +++++++++++++++ backport-0002-CVE-2021-43818.patch | 113 +++++++++++++++++++++++++++++ python-lxml.spec | 7 +- 3 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 backport-0001-CVE-2021-43818.patch create mode 100644 backport-0002-CVE-2021-43818.patch diff --git a/backport-0001-CVE-2021-43818.patch b/backport-0001-CVE-2021-43818.patch new file mode 100644 index 0000000..41d0710 --- /dev/null +++ b/backport-0001-CVE-2021-43818.patch @@ -0,0 +1,59 @@ +From 12fa9669007180a7bb87d990c375cf91ca5b664a Mon Sep 17 00:00:00 2001 +From: Stefan Behnel +Date: Thu, 11 Nov 2021 12:20:57 +0100 +Subject: [PATCH] Cleaner: Prevent "@import" from re-occurring in the CSS after + replacements, e.g. "@@importimport". + +Reported as GHSL-2021-1037 +--- + src/lxml/html/clean.py | 2 ++ + src/lxml/html/tests/test_clean.py | 20 ++++++++++++++++++++ + 2 files changed, 22 insertions(+) + +diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py +index 272b4a1..7a42562 100644 +--- a/src/lxml/html/clean.py ++++ b/src/lxml/html/clean.py +@@ -540,6 +540,8 @@ class Cleaner(object): + return True + if 'expression(' in style: + return True ++ if '@import' in style: ++ return True + if '', + lxml.html.tostring(clean_html(s))) + ++ def test_sneaky_import_in_style(self): ++ # Prevent "@@importimport" -> "@import" replacement. ++ style_codes = [ ++ "@@importimport(extstyle.css)", ++ "@ @ import import(extstyle.css)", ++ "@ @ importimport(extstyle.css)", ++ "@@ import import(extstyle.css)", ++ "@ @import import(extstyle.css)", ++ "@@importimport()", ++ ] ++ for style_code in style_codes: ++ html = '' % style_code ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ b'', ++ cleaned, ++ "%s -> %s" % (style_code, cleaned)) ++ + def test_formaction_attribute_in_button_input(self): + # The formaction attribute overrides the form's action and should be + # treated as a malicious link attribute +-- +1.8.3.1 + diff --git a/backport-0002-CVE-2021-43818.patch b/backport-0002-CVE-2021-43818.patch new file mode 100644 index 0000000..8262bab --- /dev/null +++ b/backport-0002-CVE-2021-43818.patch @@ -0,0 +1,113 @@ +From f2330237440df7e8f39c3ad1b1aa8852be3b27c0 Mon Sep 17 00:00:00 2001 +From: Stefan Behnel +Date: Thu, 11 Nov 2021 13:21:08 +0100 +Subject: [PATCH] Cleaner: Remove SVG image data URLs since they can embed + script content. + +Reported as GHSL-2021-1038 +--- + src/lxml/html/clean.py | 23 +++++++++++++------- + src/lxml/html/tests/test_clean.py | 45 +++++++++++++++++++++++++++++++++++++++ + 2 files changed, 60 insertions(+), 8 deletions(-) + +diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py +index 7a42562..4df10c2 100644 +--- a/src/lxml/html/clean.py ++++ b/src/lxml/html/clean.py +@@ -73,18 +73,25 @@ _looks_like_tag_content = re.compile( + + # All kinds of schemes besides just javascript: that can cause + # execution: +-_is_image_dataurl = re.compile( +- r'^data:image/.+;base64', re.I).search ++_find_image_dataurls = re.compile( ++ r'^data:image/(.+);base64,', re.I).findall + _is_possibly_malicious_scheme = re.compile( +- r'(?:javascript|jscript|livescript|vbscript|data|about|mocha):', +- re.I).search ++ r'(javascript|jscript|livescript|vbscript|data|about|mocha):', ++ re.I).findall ++# SVG images can contain script content ++_is_unsafe_image_type = re.compile(r"(xml|svg)", re.I).findall ++ + def _is_javascript_scheme(s): +- if _is_image_dataurl(s): +- return None +- return _is_possibly_malicious_scheme(s) ++ is_image_url = False ++ for image_type in _find_image_dataurls(s): ++ is_image_url = True ++ if _is_unsafe_image_type(image_type): ++ return True ++ if is_image_url: ++ return False ++ return bool(_is_possibly_malicious_scheme(s)) + + _substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub +-# FIXME: should data: be blocked? + + # FIXME: check against: http://msdn2.microsoft.com/en-us/library/ms537512.aspx + _conditional_comment_re = re.compile( +diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py +index d395d51..a05d967 100644 +--- a/src/lxml/html/tests/test_clean.py ++++ b/src/lxml/html/tests/test_clean.py +@@ -1,3 +1,5 @@ ++import base64 ++import gzip + import unittest + from lxml.tests.common_imports import make_doctest + +@@ -143,6 +145,49 @@ class CleanerTest(unittest.TestCase): + cleaned, + "%s -> %s" % (style_code, cleaned)) + ++ def test_svg_data_links(self): ++ # Remove SVG images with potentially insecure content. ++ svg = b'' ++ svgz = gzip.compress(svg) ++ svg_b64 = base64.b64encode(svg).decode('ASCII') ++ svgz_b64 = base64.b64encode(svgz).decode('ASCII') ++ urls = [ ++ "data:image/svg+xml;base64," + svg_b64, ++ "data:image/svg+xml-compressed;base64," + svgz_b64, ++ ] ++ for url in urls: ++ html = '' % url ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ b'', ++ cleaned, ++ "%s -> %s" % (url, cleaned)) ++ ++ def test_image_data_links(self): ++ data = b'123' ++ data_b64 = base64.b64encode(data).decode('ASCII') ++ urls = [ ++ "data:image/jpeg;base64," + data_b64, ++ "data:image/apng;base64," + data_b64, ++ "data:image/png;base64," + data_b64, ++ "data:image/gif;base64," + data_b64, ++ "data:image/webp;base64," + data_b64, ++ "data:image/bmp;base64," + data_b64, ++ "data:image/tiff;base64," + data_b64, ++ "data:image/x-icon;base64," + data_b64, ++ ] ++ for url in urls: ++ html = '' % url ++ s = lxml.html.fragment_fromstring(html) ++ ++ cleaned = lxml.html.tostring(clean_html(s)) ++ self.assertEqual( ++ html.encode("UTF-8"), ++ cleaned, ++ "%s -> %s" % (url, cleaned)) ++ + def test_formaction_attribute_in_button_input(self): + # The formaction attribute overrides the form's action and should be + # treated as a malicious link attribute +-- +1.8.3.1 + diff --git a/python-lxml.spec b/python-lxml.spec index 2b2381f..5d86f03 100644 --- a/python-lxml.spec +++ b/python-lxml.spec @@ -7,7 +7,7 @@ The latest release works with all CPython versions from 2.7 to 3.7. Name: python-%{modname} Version: 4.5.2 -Release: 3 +Release: 4 Summary: XML processing library combining libxml2/libxslt with the ElementTree API License: BSD URL: http://lxml.de @@ -16,6 +16,8 @@ Source0: http://lxml.de/files/%{modname}-%{version}.tgz Patch6000: backport-CVE-2020-27783-1.patch Patch6001: backport-CVE-2020-27783-2.patch Patch6002: backport-CVE-2021-28957.patch +Patch6003: backport-0001-CVE-2021-43818.patch +Patch6004: backport-0002-CVE-2021-43818.patch BuildRequires: gcc libxml2-devel libxslt-devel @@ -67,6 +69,9 @@ export WITH_CYTHON=true %doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt %changelog +* Wed Dec 22 2021 shixuantong - 4.5.2-4 +- fix CVE-2021-43818 + * Wed Apr 14 2021 shixuantong - 4.5.2-3 - fix CVE-2021-28957 -- Gitee