diff --git a/380.patch b/380.patch deleted file mode 100644 index daac4452f03897134763c14e88313abced121885..0000000000000000000000000000000000000000 --- a/380.patch +++ /dev/null @@ -1,24 +0,0 @@ -From d18f2f22218ea0e0b5327b5a2bda789afdf16e41 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= -Date: Fri, 14 Jul 2023 12:18:25 +0200 -Subject: [PATCH] Skip test_isoschematron.test_schematron_invalid_schema_empty - without the RNG file - -The expected SchematronParseError only happens when validate_schema is true. ---- - src/lxml/tests/test_isoschematron.py | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/lxml/tests/test_isoschematron.py b/src/lxml/tests/test_isoschematron.py -index 6d2aa3fb6..900f257c3 100644 ---- a/src/lxml/tests/test_isoschematron.py -+++ b/src/lxml/tests/test_isoschematron.py -@@ -55,6 +55,8 @@ def test_schematron_empty_pattern(self): - schema = isoschematron.Schematron(schema) - self.assertTrue(schema) - -+ @unittest.skipIf(not isoschematron.schematron_schema_valid_supported, -+ 'SchematronParseError is risen only when validate_schema is true') - def test_schematron_invalid_schema_empty(self): - schema = self.parse('''\ - diff --git a/Make-the-validation-of-ISO-Schematron-files-optional.patch b/Make-the-validation-of-ISO-Schematron-files-optional.patch deleted file mode 100644 index 3bc8132ab07625e9d5854fedfcf1a3bf56c9bede..0000000000000000000000000000000000000000 --- a/Make-the-validation-of-ISO-Schematron-files-optional.patch +++ /dev/null @@ -1,116 +0,0 @@ -From a500f721e3b34018f0a86af275427663dc337b5a Mon Sep 17 00:00:00 2001 -From: Stefan Behnel -Date: Wed, 12 Jul 2023 16:59:07 +0200 -Subject: [PATCH] Make the validation of ISO-Schematron files optional in lxml, - depending on the availability of the RNG validation file. Some lxml - distributions discard the validation schema file due to licensing issues. - -See https://bugs.launchpad.net/lxml/+bug/2024343 ---- - CHANGES.txt | 11 +++++++++++ - doc/validation.txt | 9 +++++++++ - src/lxml/isoschematron/__init__.py | 24 +++++++++++++++++++----- - 3 files changed, 39 insertions(+), 5 deletions(-) - -diff --git a/CHANGES.txt b/CHANGES.txt -index 24052db..e68ee9a 100644 ---- a/CHANGES.txt -+++ b/CHANGES.txt -@@ -2,6 +2,17 @@ - lxml changelog - ============== - -+4.9.3+ -+====== -+ -+* LP#2024343: The validation of the schema file itself is now optional in the -+ ISO-Schematron implementation. This was done because some lxml distributions -+ discard the RNG validation schema file due to licensing issues. The validation -+ can now always be disabled with ``Schematron(..., validate_schema=False)``. -+ It is enabled by default if available and disabled otherwise. The module -+ constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used -+ to detect whether schema file validation is available. -+ - 4.9.3 (2023-07-05) - ================== - -diff --git a/doc/validation.txt b/doc/validation.txt -index af9d007..27c0ccd 100644 ---- a/doc/validation.txt -+++ b/doc/validation.txt -@@ -615,6 +615,15 @@ The usage of validation phases is a unique feature of ISO-Schematron and can be - a very powerful tool e.g. for establishing validation stages or to provide - different validators for different "validation audiences". - -+Note: Some lxml distributions exclude the validation schema file due to licensing issues. -+Since lxml 4.9.2-8, the validation of the user provided schema can be disabled with -+``Schematron(..., validate_schema=False)``. -+It is enabled by default if available and disabled otherwise. Previous versions of -+lxml always had it enabled and failed at import time if the file was not available. -+Thus, some distributions chose to remove the entire ISO-Schematron support. -+The module constant ``lxml.isoschematron.schematron_schema_valid_supported`` can be used -+since lxml 4.9.2-8 to detect whether schema file validation is available. -+ - (Pre-ISO-Schematron) - -------------------- - -diff --git a/src/lxml/isoschematron/__init__.py b/src/lxml/isoschematron/__init__.py -index 5967b10..2846a66 100644 ---- a/src/lxml/isoschematron/__init__.py -+++ b/src/lxml/isoschematron/__init__.py -@@ -61,10 +61,16 @@ iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse( - svrl_validation_errors = _etree.XPath( - '//svrl:failed-assert', namespaces={'svrl': SVRL_NS}) - -- - # RelaxNG validator for schematron schemas --schematron_schema_valid = _etree.RelaxNG( -- file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) -+schematron_schema_valid_supported = False -+try: -+ schematron_schema_valid = _etree.RelaxNG( -+ file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) -+ schematron_schema_valid_supported = True -+except _etree.RelaxNGParseError: -+ # Some distributions delete the file due to licensing issues. -+ def schematron_schema_valid(arg): -+ raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng") - - - def stylesheet_params(**kwargs): -@@ -153,6 +159,13 @@ class Schematron(_etree._Validator): - report document gets stored and can be accessed as the ``validation_report`` - property. - -+ If ``validate_schema`` is set to False, the validation of the schema file -+ itself is disabled. Validation happens by default after building the full -+ schema, unless the schema validation file cannot be found at import time, -+ in which case the validation gets disabled. Some lxml distributions exclude -+ this file due to licensing issues. ISO-Schematron validation can then still -+ be used normally, but the schemas themselves cannot be validated. -+ - Here is a usage example:: - - >>> from lxml import etree -@@ -234,7 +247,8 @@ class Schematron(_etree._Validator): - def __init__(self, etree=None, file=None, include=True, expand=True, - include_params={}, expand_params={}, compile_params={}, - store_schematron=False, store_xslt=False, store_report=False, -- phase=None, error_finder=ASSERTS_ONLY): -+ phase=None, error_finder=ASSERTS_ONLY, -+ validate_schema=schematron_schema_valid_supported): - super(Schematron, self).__init__() - - self._store_report = store_report -@@ -273,7 +287,7 @@ class Schematron(_etree._Validator): - schematron = self._include(schematron, **include_params) - if expand: - schematron = self._expand(schematron, **expand_params) -- if not schematron_schema_valid(schematron): -+ if validate_schema and not schematron_schema_valid(schematron): - raise _etree.SchematronParseError( - "invalid schematron schema: %s" % - schematron_schema_valid.error_log) --- -2.40.1 - diff --git a/Skip-failing-test_iterparse_utf16_bom.patch b/Skip-failing-test_iterparse_utf16_bom.patch index 769a25735117b0aec806766292c971badf48f955..55d0aab5b932ec27a42e8ea9684b35ef9e91baaa 100644 --- a/Skip-failing-test_iterparse_utf16_bom.patch +++ b/Skip-failing-test_iterparse_utf16_bom.patch @@ -8,24 +8,24 @@ Subject: [PATCH] Skip failing test_iterparse_utf16_bom 1 file changed, 2 insertions(+) diff --git a/src/lxml/tests/test_io.py b/src/lxml/tests/test_io.py -index cbdbcef..6349b90 100644 +index 8fac41d..2b5d0de 100644 --- a/src/lxml/tests/test_io.py +++ b/src/lxml/tests/test_io.py -@@ -7,6 +7,7 @@ IO test cases that apply to both etree and ElementTree - from __future__ import absolute_import +@@ -4,6 +4,7 @@ IO test cases that apply to both etree and ElementTree + import unittest +from unittest import skip import tempfile, gzip, os, os.path, gc, shutil from .common_imports import ( -@@ -304,6 +305,7 @@ class _IOTestCaseBase(HelperTestCase): +@@ -305,6 +306,7 @@ class _IOTestCaseBase(HelperTestCase): os.unlink(f.name) self.assertEqual(utext, root.text) + @skip def test_iterparse_utf16_bom(self): - utext = _str('Søk på nettet') + utext = 'Søk på nettet' uxml = '

%s

' % utext -- 2.33.0 diff --git a/backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch b/backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch deleted file mode 100644 index 590785e5d564bbe67716a6f6a7b6ba20a8dba832..0000000000000000000000000000000000000000 --- a/backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch +++ /dev/null @@ -1,226 +0,0 @@ -From 72f5a287a4016ecb405f2e8a4a03ae22a5b0b496 Mon Sep 17 00:00:00 2001 -From: Stefan Behnel -Date: Wed, 5 Jul 2023 22:10:45 +0200 -Subject: [PATCH] Change HTML "prefix" handling in ElementPath to let - "element.find('part1:part2')" search for "part1:part2" instead of just - "part2" with an unknown prefix. Also adapt the HTML "prefix" parsing test to - make it work in libxml2 2.10.4 and later, where HTML "prefixes" are kept as - part of the tag name by the parser. - ---- - src/lxml/_elementpath.py | 22 +++++++++++----------- - src/lxml/apihelpers.pxi | 7 +++++++ - src/lxml/etree.pyx | 8 ++++---- - src/lxml/includes/tree.pxd | 12 ++++++++++++ - src/lxml/tests/test_etree.py | 26 ++++++++++++++++++++++---- - 5 files changed, 56 insertions(+), 19 deletions(-) - -diff --git a/src/lxml/_elementpath.py b/src/lxml/_elementpath.py -index eabd81c..001b345 100644 ---- a/src/lxml/_elementpath.py -+++ b/src/lxml/_elementpath.py -@@ -71,14 +71,14 @@ xpath_tokenizer_re = re.compile( - r"\s+" - ) - --def xpath_tokenizer(pattern, namespaces=None): -+def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True): - # ElementTree uses '', lxml used None originally. - default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None - parsing_attribute = False - for token in xpath_tokenizer_re.findall(pattern): - ttype, tag = token - if tag and tag[0] != "{": -- if ":" in tag: -+ if ":" in tag and with_prefixes: - prefix, uri = tag.split(":", 1) - try: - if not namespaces: -@@ -251,7 +251,7 @@ ops = { - _cache = {} - - --def _build_path_iterator(path, namespaces): -+def _build_path_iterator(path, namespaces, with_prefixes=True): - """compile selector pattern""" - if path[-1:] == "/": - path += "*" # implicit all (FIXME: keep this?) -@@ -279,7 +279,7 @@ def _build_path_iterator(path, namespaces): - - if path[:1] == "/": - raise SyntaxError("cannot use absolute path on element") -- stream = iter(xpath_tokenizer(path, namespaces)) -+ stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes)) - try: - _next = stream.next - except AttributeError: -@@ -308,8 +308,8 @@ def _build_path_iterator(path, namespaces): - ## - # Iterate over the matching nodes - --def iterfind(elem, path, namespaces=None): -- selector = _build_path_iterator(path, namespaces) -+def iterfind(elem, path, namespaces=None, with_prefixes=True): -+ selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes) - result = iter((elem,)) - for select in selector: - result = select(result) -@@ -319,8 +319,8 @@ def iterfind(elem, path, namespaces=None): - ## - # Find first matching object. - --def find(elem, path, namespaces=None): -- it = iterfind(elem, path, namespaces) -+def find(elem, path, namespaces=None, with_prefixes=True): -+ it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes) - try: - return next(it) - except StopIteration: -@@ -330,15 +330,15 @@ def find(elem, path, namespaces=None): - ## - # Find all matching objects. - --def findall(elem, path, namespaces=None): -+def findall(elem, path, namespaces=None, with_prefixes=True): - return list(iterfind(elem, path, namespaces)) - - - ## - # Find text for first matching object. - --def findtext(elem, path, default=None, namespaces=None): -- el = find(elem, path, namespaces) -+def findtext(elem, path, default=None, namespaces=None, with_prefixes=True): -+ el = find(elem, path, namespaces, with_prefixes=with_prefixes) - if el is None: - return default - else: -diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi -index 9fae9fb..35b3187 100644 ---- a/src/lxml/apihelpers.pxi -+++ b/src/lxml/apihelpers.pxi -@@ -15,6 +15,13 @@ cdef void displayNode(xmlNode* c_node, indent): - finally: - return # swallow any exceptions - -+cdef inline bint _isHtmlDocument(_Element element) except -1: -+ cdef xmlNode* c_node = element._c_node -+ return ( -+ c_node is not NULL and c_node.doc is not NULL and -+ c_node.doc.properties & tree.XML_DOC_HTML != 0 -+ ) -+ - cdef inline int _assertValidNode(_Element element) except -1: - assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element) - -diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx -index c0d236b..9acea68 100644 ---- a/src/lxml/etree.pyx -+++ b/src/lxml/etree.pyx -@@ -1547,7 +1547,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: - """ - if isinstance(path, QName): - path = (path).text -- return _elementpath.find(self, path, namespaces) -+ return _elementpath.find(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) - - def findtext(self, path, default=None, namespaces=None): - u"""findtext(self, path, default=None, namespaces=None) -@@ -1560,7 +1560,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: - """ - if isinstance(path, QName): - path = (path).text -- return _elementpath.findtext(self, path, default, namespaces) -+ return _elementpath.findtext(self, path, default, namespaces, with_prefixes=not _isHtmlDocument(self)) - - def findall(self, path, namespaces=None): - u"""findall(self, path, namespaces=None) -@@ -1573,7 +1573,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: - """ - if isinstance(path, QName): - path = (path).text -- return _elementpath.findall(self, path, namespaces) -+ return _elementpath.findall(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) - - def iterfind(self, path, namespaces=None): - u"""iterfind(self, path, namespaces=None) -@@ -1586,7 +1586,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: - """ - if isinstance(path, QName): - path = (path).text -- return _elementpath.iterfind(self, path, namespaces) -+ return _elementpath.iterfind(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) - - def xpath(self, _path, *, namespaces=None, extensions=None, - smart_strings=True, **_variables): -diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd -index 010af80..d709313 100644 ---- a/src/lxml/includes/tree.pxd -+++ b/src/lxml/includes/tree.pxd -@@ -154,6 +154,17 @@ cdef extern from "libxml/tree.h": - XML_EXTERNAL_PARAMETER_ENTITY= 5 - XML_INTERNAL_PREDEFINED_ENTITY= 6 - -+ ctypedef enum xmlDocProperties: -+ XML_DOC_WELLFORMED = 1 # /* document is XML well formed */ -+ XML_DOC_NSVALID = 2 # /* document is Namespace valid */ -+ XML_DOC_OLD10 = 4 # /* parsed with old XML-1.0 parser */ -+ XML_DOC_DTDVALID = 8 # /* DTD validation was successful */ -+ XML_DOC_XINCLUDE = 16 # /* XInclude substitution was done */ -+ XML_DOC_USERBUILT = 32 # /* Document was built using the API -+ # and not by parsing an instance */ -+ XML_DOC_INTERNAL = 64 # /* built for internal processing */ -+ XML_DOC_HTML = 128 # /* parsed or built HTML document */ -+ - ctypedef struct xmlNs: - const_xmlChar* href - const_xmlChar* prefix -@@ -274,6 +285,7 @@ cdef extern from "libxml/tree.h": - void* _private - xmlDtd* intSubset - xmlDtd* extSubset -+ int properties - - ctypedef struct xmlAttr: - void* _private -diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py -index 0339796..80a12a4 100644 ---- a/src/lxml/tests/test_etree.py -+++ b/src/lxml/tests/test_etree.py -@@ -3069,11 +3069,29 @@ class ETreeOnlyTestCase(HelperTestCase): - - def test_html_prefix_nsmap(self): - etree = self.etree -- el = etree.HTML('aa').find('.//page-description') -- if etree.LIBXML_VERSION < (2, 9, 11): -- self.assertEqual({'hha': None}, el.nsmap) -+ el = etree.HTML('aa') -+ pd = el[-1] -+ while len(pd): -+ pd = pd[-1] -+ -+ if etree.LIBXML_VERSION >= (2, 10, 4): -+ # "Prefix" is kept as part of the tag name. -+ self.assertEqual("hha:page-description", pd.tag) -+ self.assertIsNone(el.find('.//page-description')) -+ self.assertIsNotNone(el.find('.//hha:page-description')) # no namespaces! -+ for e in el.iter(): -+ self.assertEqual({}, e.nsmap) -+ elif etree.LIBXML_VERSION >= (2, 9, 11): -+ # "Prefix" is stripped. -+ self.assertEqual("page-description", pd.tag) -+ self.assertIsNotNone(el.find('.//page-description')) -+ for e in el.iter(): -+ self.assertEqual({}, e.nsmap) - else: -- self.assertEqual({}, el.nsmap) -+ # "Prefix" is parsed as XML prefix. -+ self.assertEqual("page-description", pd.tag) -+ pd = el.find('.//page-description') -+ self.assertEqual({'hha': None}, pd.nsmap) - - def test_getchildren(self): - Element = self.etree.Element --- -2.33.0 - diff --git a/lxml-4.9.3.tar.gz b/lxml-5.1.0.tar.gz similarity index 33% rename from lxml-4.9.3.tar.gz rename to lxml-5.1.0.tar.gz index 056e14aeb12019529b21622c8bca7f0f7899efd3..d43cdef635556f8ac67d4e7c6fb5adc8c1ba2329 100644 Binary files a/lxml-4.9.3.tar.gz and b/lxml-5.1.0.tar.gz differ diff --git a/python-lxml.spec b/python-lxml.spec index 6032ef8c8cbb3a402222eadfbae2412552ab0109..c47f09db6e8c6608ff91b96e2288beee3d25a2eb 100644 --- a/python-lxml.spec +++ b/python-lxml.spec @@ -6,18 +6,14 @@ the simplicity of a native Python API, mostly compatible but superior to the wel The latest release works with all CPython versions from 2.7 to 3.7. Name: python-lxml -Version: 4.9.3 -Release: 2 +Version: 5.1.0 +Release: 1 Summary: XML processing library combining libxml2/libxslt with the ElementTree API License: BSD URL: https://github.com/lxml/lxml -Source0: https://files.pythonhosted.org/packages/30/39/7305428d1c4f28282a4f5bdbef24e0f905d351f34cf351ceb131f5cddf78/lxml-4.9.3.tar.gz +Source0: https://files.pythonhosted.org/packages/2b/b4/bbccb250adbee490553b6a52712c46c20ea1ba533a643f1424b27ffc6845/lxml-5.1.0.tar.gz -Patch0: Make-the-validation-of-ISO-Schematron-files-optional.patch -Patch1: 380.patch -Patch2: Skip-failing-test_iterparse_utf16_bom.patch - -Patch6000: backport-Change-HTML-prefix-handling-in-ElementPath-to-let-el.patch +Patch0: Skip-failing-test_iterparse_utf16_bom.patch %description %{_description} @@ -71,7 +67,7 @@ mv %{buildroot}/filelist.lst . mv %{buildroot}/doclist.lst . %check -make test3 +make test %files -n python3-lxml -f filelist.lst %license doc/licenses/*.txt LICENSES.txt @@ -81,6 +77,17 @@ make test3 %doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt %changelog +* Wed Feb 07 2024 dongyuzhen - 5.1.0-1 +- upgrade version to 5.1.0: + - some incorrect declarations were removed from ``python.pxd`` + - built with Cython 3.0.7 + - some redundant and long deprecated methods were removed + - character escaping in ``C14N2`` serialisation now uses a single pass over the text instead of searching for each unescaped character separately + - early support for Python 3.13a2 was added + - support for Python 2.7 and Python versions < 3.6 was removed + - parsing ASCII strings is slightly faster + - some bugs fixes + * Wed Aug 09 2023 zhuofeng - 4.9.3-2 - sync fedara patch