diff --git a/380.patch b/380.patch new file mode 100644 index 0000000000000000000000000000000000000000..daac4452f03897134763c14e88313abced121885 --- /dev/null +++ b/380.patch @@ -0,0 +1,24 @@ +From d18f2f22218ea0e0b5327b5a2bda789afdf16e41 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= +Date: Fri, 14 Jul 2023 12:18:25 +0200 +Subject: [PATCH] Skip test_isoschematron.test_schematron_invalid_schema_empty + without the RNG file + +The expected SchematronParseError only happens when validate_schema is true. +--- + src/lxml/tests/test_isoschematron.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/lxml/tests/test_isoschematron.py b/src/lxml/tests/test_isoschematron.py +index 6d2aa3fb6..900f257c3 100644 +--- a/src/lxml/tests/test_isoschematron.py ++++ b/src/lxml/tests/test_isoschematron.py +@@ -55,6 +55,8 @@ def test_schematron_empty_pattern(self): + schema = isoschematron.Schematron(schema) + self.assertTrue(schema) + ++ @unittest.skipIf(not isoschematron.schematron_schema_valid_supported, ++ 'SchematronParseError is risen only when validate_schema is true') + def test_schematron_invalid_schema_empty(self): + schema = self.parse('''\ + diff --git a/Make-the-validation-of-ISO-Schematron-files-optional.patch b/Make-the-validation-of-ISO-Schematron-files-optional.patch new file mode 100644 index 0000000000000000000000000000000000000000..3bc8132ab07625e9d5854fedfcf1a3bf56c9bede --- /dev/null +++ b/Make-the-validation-of-ISO-Schematron-files-optional.patch @@ -0,0 +1,116 @@ +From a500f721e3b34018f0a86af275427663dc337b5a Mon Sep 17 00:00:00 2001 +From: Stefan Behnel +Date: Wed, 12 Jul 2023 16:59:07 +0200 +Subject: [PATCH] Make the validation of ISO-Schematron files optional in lxml, + depending on the availability of the RNG validation file. Some lxml + distributions discard the validation schema file due to licensing issues. + +See https://bugs.launchpad.net/lxml/+bug/2024343 +--- + CHANGES.txt | 11 +++++++++++ + doc/validation.txt | 9 +++++++++ + src/lxml/isoschematron/__init__.py | 24 +++++++++++++++++++----- + 3 files changed, 39 insertions(+), 5 deletions(-) + +diff --git a/CHANGES.txt b/CHANGES.txt +index 24052db..e68ee9a 100644 +--- a/CHANGES.txt ++++ b/CHANGES.txt +@@ -2,6 +2,17 @@ + lxml changelog + ============== + ++4.9.3+ ++====== ++ ++* LP#2024343: The validation of the schema file itself is now optional in the ++ ISO-Schematron implementation. This was done because some lxml distributions ++ discard the RNG validation schema file due to licensing issues. The validation ++ can now always be disabled with ``Schematron(..., validate_schema=False)``. ++ It is enabled by default if available and disabled otherwise. The module ++ constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used ++ to detect whether schema file validation is available. ++ + 4.9.3 (2023-07-05) + ================== + +diff --git a/doc/validation.txt b/doc/validation.txt +index af9d007..27c0ccd 100644 +--- a/doc/validation.txt ++++ b/doc/validation.txt +@@ -615,6 +615,15 @@ The usage of validation phases is a unique feature of ISO-Schematron and can be + a very powerful tool e.g. for establishing validation stages or to provide + different validators for different "validation audiences". + ++Note: Some lxml distributions exclude the validation schema file due to licensing issues. ++Since lxml 4.9.2-8, the validation of the user provided schema can be disabled with ++``Schematron(..., validate_schema=False)``. ++It is enabled by default if available and disabled otherwise. Previous versions of ++lxml always had it enabled and failed at import time if the file was not available. ++Thus, some distributions chose to remove the entire ISO-Schematron support. ++The module constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used ++since lxml 4.9.2-8 to detect whether schema file validation is available. ++ + (Pre-ISO-Schematron) + -------------------- + +diff --git a/src/lxml/isoschematron/__init__.py b/src/lxml/isoschematron/__init__.py +index 5967b10..2846a66 100644 +--- a/src/lxml/isoschematron/__init__.py ++++ b/src/lxml/isoschematron/__init__.py +@@ -61,10 +61,16 @@ iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse( + svrl_validation_errors = _etree.XPath( + '//svrl:failed-assert', namespaces={'svrl': SVRL_NS}) + +- + # RelaxNG validator for schematron schemas +-schematron_schema_valid = _etree.RelaxNG( +- file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) ++schematron_schema_valid_supported = False ++try: ++ schematron_schema_valid = _etree.RelaxNG( ++ file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) ++ schematron_schema_valid_supported = True ++except _etree.RelaxNGParseError: ++ # Some distributions delete the file due to licensing issues. ++ def schematron_schema_valid(arg): ++ raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng") + + + def stylesheet_params(**kwargs): +@@ -153,6 +159,13 @@ class Schematron(_etree._Validator): + report document gets stored and can be accessed as the ``validation_report`` + property. + ++ If ``validate_schema`` is set to False, the validation of the schema file ++ itself is disabled. Validation happens by default after building the full ++ schema, unless the schema validation file cannot be found at import time, ++ in which case the validation gets disabled. Some lxml distributions exclude ++ this file due to licensing issues. ISO-Schematron validation can then still ++ be used normally, but the schemas themselves cannot be validated. ++ + Here is a usage example:: + + >>> from lxml import etree +@@ -234,7 +247,8 @@ class Schematron(_etree._Validator): + def __init__(self, etree=None, file=None, include=True, expand=True, + include_params={}, expand_params={}, compile_params={}, + store_schematron=False, store_xslt=False, store_report=False, +- phase=None, error_finder=ASSERTS_ONLY): ++ phase=None, error_finder=ASSERTS_ONLY, ++ validate_schema=schematron_schema_valid_supported): + super(Schematron, self).__init__() + + self._store_report = store_report +@@ -273,7 +287,7 @@ class Schematron(_etree._Validator): + schematron = self._include(schematron, **include_params) + if expand: + schematron = self._expand(schematron, **expand_params) +- if not schematron_schema_valid(schematron): ++ if validate_schema and not schematron_schema_valid(schematron): + raise _etree.SchematronParseError( + "invalid schematron schema: %s" % + schematron_schema_valid.error_log) +-- +2.40.1 + diff --git a/Skip-failing-test_iterparse_utf16_bom.patch b/Skip-failing-test_iterparse_utf16_bom.patch new file mode 100644 index 0000000000000000000000000000000000000000..769a25735117b0aec806766292c971badf48f955 --- /dev/null +++ b/Skip-failing-test_iterparse_utf16_bom.patch @@ -0,0 +1,32 @@ +From 1e096eeabcb6f3995c8e9da6f544e7f9f5ff5f08 Mon Sep 17 00:00:00 2001 +From: zhuofeng +Date: Wed, 9 Aug 2023 15:22:11 +0800 +Subject: [PATCH] Skip failing test_iterparse_utf16_bom + +--- + src/lxml/tests/test_io.py | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/src/lxml/tests/test_io.py b/src/lxml/tests/test_io.py +index cbdbcef..6349b90 100644 +--- a/src/lxml/tests/test_io.py ++++ b/src/lxml/tests/test_io.py +@@ -7,6 +7,7 @@ IO test cases that apply to both etree and ElementTree + from __future__ import absolute_import + + import unittest ++from unittest import skip + import tempfile, gzip, os, os.path, gc, shutil + + from .common_imports import ( +@@ -304,6 +305,7 @@ class _IOTestCaseBase(HelperTestCase): + os.unlink(f.name) + self.assertEqual(utext, root.text) + ++ @skip + def test_iterparse_utf16_bom(self): + utext = _str('Søk på nettet') + uxml = '

%s

' % utext +-- +2.33.0 + diff --git a/backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch b/backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch new file mode 100644 index 0000000000000000000000000000000000000000..590785e5d564bbe67716a6f6a7b6ba20a8dba832 --- /dev/null +++ b/backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch @@ -0,0 +1,226 @@ +From 72f5a287a4016ecb405f2e8a4a03ae22a5b0b496 Mon Sep 17 00:00:00 2001 +From: Stefan Behnel +Date: Wed, 5 Jul 2023 22:10:45 +0200 +Subject: [PATCH] Change HTML "prefix" handling in ElementPath to let + "element.find('part1:part2')" search for "part1:part2" instead of just + "part2" with an unknown prefix. Also adapt the HTML "prefix" parsing test to + make it work in libxml2 2.10.4 and later, where HTML "prefixes" are kept as + part of the tag name by the parser. + +--- + src/lxml/_elementpath.py | 22 +++++++++++----------- + src/lxml/apihelpers.pxi | 7 +++++++ + src/lxml/etree.pyx | 8 ++++---- + src/lxml/includes/tree.pxd | 12 ++++++++++++ + src/lxml/tests/test_etree.py | 26 ++++++++++++++++++++++---- + 5 files changed, 56 insertions(+), 19 deletions(-) + +diff --git a/src/lxml/_elementpath.py b/src/lxml/_elementpath.py +index eabd81c..001b345 100644 +--- a/src/lxml/_elementpath.py ++++ b/src/lxml/_elementpath.py +@@ -71,14 +71,14 @@ xpath_tokenizer_re = re.compile( + r"\s+" + ) + +-def xpath_tokenizer(pattern, namespaces=None): ++def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True): + # ElementTree uses '', lxml used None originally. + default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None + parsing_attribute = False + for token in xpath_tokenizer_re.findall(pattern): + ttype, tag = token + if tag and tag[0] != "{": +- if ":" in tag: ++ if ":" in tag and with_prefixes: + prefix, uri = tag.split(":", 1) + try: + if not namespaces: +@@ -251,7 +251,7 @@ ops = { + _cache = {} + + +-def _build_path_iterator(path, namespaces): ++def _build_path_iterator(path, namespaces, with_prefixes=True): + """compile selector pattern""" + if path[-1:] == "/": + path += "*" # implicit all (FIXME: keep this?) +@@ -279,7 +279,7 @@ def _build_path_iterator(path, namespaces): + + if path[:1] == "/": + raise SyntaxError("cannot use absolute path on element") +- stream = iter(xpath_tokenizer(path, namespaces)) ++ stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes)) + try: + _next = stream.next + except AttributeError: +@@ -308,8 +308,8 @@ def _build_path_iterator(path, namespaces): + ## + # Iterate over the matching nodes + +-def iterfind(elem, path, namespaces=None): +- selector = _build_path_iterator(path, namespaces) ++def iterfind(elem, path, namespaces=None, with_prefixes=True): ++ selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes) + result = iter((elem,)) + for select in selector: + result = select(result) +@@ -319,8 +319,8 @@ def iterfind(elem, path, namespaces=None): + ## + # Find first matching object. + +-def find(elem, path, namespaces=None): +- it = iterfind(elem, path, namespaces) ++def find(elem, path, namespaces=None, with_prefixes=True): ++ it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes) + try: + return next(it) + except StopIteration: +@@ -330,15 +330,15 @@ def find(elem, path, namespaces=None): + ## + # Find all matching objects. + +-def findall(elem, path, namespaces=None): ++def findall(elem, path, namespaces=None, with_prefixes=True): + return list(iterfind(elem, path, namespaces)) + + + ## + # Find text for first matching object. + +-def findtext(elem, path, default=None, namespaces=None): +- el = find(elem, path, namespaces) ++def findtext(elem, path, default=None, namespaces=None, with_prefixes=True): ++ el = find(elem, path, namespaces, with_prefixes=with_prefixes) + if el is None: + return default + else: +diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi +index 9fae9fb..35b3187 100644 +--- a/src/lxml/apihelpers.pxi ++++ b/src/lxml/apihelpers.pxi +@@ -15,6 +15,13 @@ cdef void displayNode(xmlNode* c_node, indent): + finally: + return # swallow any exceptions + ++cdef inline bint _isHtmlDocument(_Element element) except -1: ++ cdef xmlNode* c_node = element._c_node ++ return ( ++ c_node is not NULL and c_node.doc is not NULL and ++ c_node.doc.properties & tree.XML_DOC_HTML != 0 ++ ) ++ + cdef inline int _assertValidNode(_Element element) except -1: + assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element) + +diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx +index c0d236b..9acea68 100644 +--- a/src/lxml/etree.pyx ++++ b/src/lxml/etree.pyx +@@ -1547,7 +1547,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.find(self, path, namespaces) ++ return _elementpath.find(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def findtext(self, path, default=None, namespaces=None): + u"""findtext(self, path, default=None, namespaces=None) +@@ -1560,7 +1560,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.findtext(self, path, default, namespaces) ++ return _elementpath.findtext(self, path, default, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def findall(self, path, namespaces=None): + u"""findall(self, path, namespaces=None) +@@ -1573,7 +1573,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.findall(self, path, namespaces) ++ return _elementpath.findall(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def iterfind(self, path, namespaces=None): + u"""iterfind(self, path, namespaces=None) +@@ -1586,7 +1586,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.iterfind(self, path, namespaces) ++ return _elementpath.iterfind(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def xpath(self, _path, *, namespaces=None, extensions=None, + smart_strings=True, **_variables): +diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd +index 010af80..d709313 100644 +--- a/src/lxml/includes/tree.pxd ++++ b/src/lxml/includes/tree.pxd +@@ -154,6 +154,17 @@ cdef extern from "libxml/tree.h": + XML_EXTERNAL_PARAMETER_ENTITY= 5 + XML_INTERNAL_PREDEFINED_ENTITY= 6 + ++ ctypedef enum xmlDocProperties: ++ XML_DOC_WELLFORMED = 1 # /* document is XML well formed */ ++ XML_DOC_NSVALID = 2 # /* document is Namespace valid */ ++ XML_DOC_OLD10 = 4 # /* parsed with old XML-1.0 parser */ ++ XML_DOC_DTDVALID = 8 # /* DTD validation was successful */ ++ XML_DOC_XINCLUDE = 16 # /* XInclude substitution was done */ ++ XML_DOC_USERBUILT = 32 # /* Document was built using the API ++ # and not by parsing an instance */ ++ XML_DOC_INTERNAL = 64 # /* built for internal processing */ ++ XML_DOC_HTML = 128 # /* parsed or built HTML document */ ++ + ctypedef struct xmlNs: + const_xmlChar* href + const_xmlChar* prefix +@@ -274,6 +285,7 @@ cdef extern from "libxml/tree.h": + void* _private + xmlDtd* intSubset + xmlDtd* extSubset ++ int properties + + ctypedef struct xmlAttr: + void* _private +diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py +index 0339796..80a12a4 100644 +--- a/src/lxml/tests/test_etree.py ++++ b/src/lxml/tests/test_etree.py +@@ -3069,11 +3069,29 @@ class ETreeOnlyTestCase(HelperTestCase): + + def test_html_prefix_nsmap(self): + etree = self.etree +- el = etree.HTML('aa').find('.//page-description') +- if etree.LIBXML_VERSION < (2, 9, 11): +- self.assertEqual({'hha': None}, el.nsmap) ++ el = etree.HTML('aa') ++ pd = el[-1] ++ while len(pd): ++ pd = pd[-1] ++ ++ if etree.LIBXML_VERSION >= (2, 10, 4): ++ # "Prefix" is kept as part of the tag name. ++ self.assertEqual("hha:page-description", pd.tag) ++ self.assertIsNone(el.find('.//page-description')) ++ self.assertIsNotNone(el.find('.//hha:page-description')) # no namespaces! ++ for e in el.iter(): ++ self.assertEqual({}, e.nsmap) ++ elif etree.LIBXML_VERSION >= (2, 9, 11): ++ # "Prefix" is stripped. ++ self.assertEqual("page-description", pd.tag) ++ self.assertIsNotNone(el.find('.//page-description')) ++ for e in el.iter(): ++ self.assertEqual({}, e.nsmap) + else: +- self.assertEqual({}, el.nsmap) ++ # "Prefix" is parsed as XML prefix. ++ self.assertEqual("page-description", pd.tag) ++ pd = el.find('.//page-description') ++ self.assertEqual({'hha': None}, pd.nsmap) + + def test_getchildren(self): + Element = self.etree.Element +-- +2.33.0 + diff --git a/python-lxml.spec b/python-lxml.spec index b66ba5a38a2e0b43d4b6753595e279d2edfa5877..6032ef8c8cbb3a402222eadfbae2412552ab0109 100644 --- a/python-lxml.spec +++ b/python-lxml.spec @@ -7,11 +7,17 @@ The latest release works with all CPython versions from 2.7 to 3.7. Name: python-lxml Version: 4.9.3 -Release: 1 +Release: 2 Summary: XML processing library combining libxml2/libxslt with the ElementTree API License: BSD URL: https://github.com/lxml/lxml -Source0: https://files.pythonhosted.org/packages/06/5a/e11cad7b79f2cf3dd2ff8f81fa8ca667e7591d3d8451768589996b65dec1/lxml-4.9.3.tar.gz +Source0: https://files.pythonhosted.org/packages/30/39/7305428d1c4f28282a4f5bdbef24e0f905d351f34cf351ceb131f5cddf78/lxml-4.9.3.tar.gz + +Patch0: Make-the-validation-of-ISO-Schematron-files-optional.patch +Patch1: 380.patch +Patch2: Skip-failing-test_iterparse_utf16_bom.patch + +Patch6000: backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch %description %{_description} @@ -35,7 +41,7 @@ Provides: python3-lxml-doc %{_description} %prep -%autosetup -n lxml-%{version} +%autosetup -n lxml-%{version} -p1 find -type f -name '*.c' -print -delete %build @@ -75,6 +81,9 @@ make test3 %doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt %changelog +* Wed Aug 09 2023 zhuofeng - 4.9.3-2 +- sync fedara patch + * Wed Jul 12 2023 sunhui - 4.9.3-1 - Update package to version 4.9.3