diff --git a/0001-Adapt-to-libxml2-2.10.4-and-later.patch b/0001-Adapt-to-libxml2-2.10.4-and-later.patch new file mode 100644 index 0000000000000000000000000000000000000000..b7a17c4337d18b6c3da9440cc4b48cae833a44e5 --- /dev/null +++ b/0001-Adapt-to-libxml2-2.10.4-and-later.patch @@ -0,0 +1,206 @@ +diff --git a/src/lxml/_elementpath.py b/src/lxml/_elementpath.py +index eabd81c..001b345 100644 +--- a/src/lxml/_elementpath.py ++++ b/src/lxml/_elementpath.py +@@ -71,14 +71,14 @@ xpath_tokenizer_re = re.compile( + r"\s+" + ) + +-def xpath_tokenizer(pattern, namespaces=None): ++def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True): + # ElementTree uses '', lxml used None originally. + default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None + parsing_attribute = False + for token in xpath_tokenizer_re.findall(pattern): + ttype, tag = token + if tag and tag[0] != "{": +- if ":" in tag: ++ if ":" in tag and with_prefixes: + prefix, uri = tag.split(":", 1) + try: + if not namespaces: +@@ -251,7 +251,7 @@ ops = { + _cache = {} + + +-def _build_path_iterator(path, namespaces): ++def _build_path_iterator(path, namespaces, with_prefixes=True): + """compile selector pattern""" + if path[-1:] == "/": + path += "*" # implicit all (FIXME: keep this?) +@@ -279,7 +279,7 @@ def _build_path_iterator(path, namespaces): + + if path[:1] == "/": + raise SyntaxError("cannot use absolute path on element") +- stream = iter(xpath_tokenizer(path, namespaces)) ++ stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes)) + try: + _next = stream.next + except AttributeError: +@@ -308,8 +308,8 @@ def _build_path_iterator(path, namespaces): + ## + # Iterate over the matching nodes + +-def iterfind(elem, path, namespaces=None): +- selector = _build_path_iterator(path, namespaces) ++def iterfind(elem, path, namespaces=None, with_prefixes=True): ++ selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes) + result = iter((elem,)) + for select in selector: + result = select(result) +@@ -319,8 +319,8 @@ def iterfind(elem, path, namespaces=None): + ## + # Find first matching object. + +-def find(elem, path, namespaces=None): +- it = iterfind(elem, path, namespaces) ++def find(elem, path, namespaces=None, with_prefixes=True): ++ it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes) + try: + return next(it) + except StopIteration: +@@ -330,15 +330,15 @@ def find(elem, path, namespaces=None): + ## + # Find all matching objects. + +-def findall(elem, path, namespaces=None): ++def findall(elem, path, namespaces=None, with_prefixes=True): + return list(iterfind(elem, path, namespaces)) + + + ## + # Find text for first matching object. + +-def findtext(elem, path, default=None, namespaces=None): +- el = find(elem, path, namespaces) ++def findtext(elem, path, default=None, namespaces=None, with_prefixes=True): ++ el = find(elem, path, namespaces, with_prefixes=with_prefixes) + if el is None: + return default + else: +diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi +index 9fae9fb..35b3187 100644 +--- a/src/lxml/apihelpers.pxi ++++ b/src/lxml/apihelpers.pxi +@@ -15,6 +15,13 @@ cdef void displayNode(xmlNode* c_node, indent): + finally: + return # swallow any exceptions + ++cdef inline bint _isHtmlDocument(_Element element) except -1: ++ cdef xmlNode* c_node = element._c_node ++ return ( ++ c_node is not NULL and c_node.doc is not NULL and ++ c_node.doc.properties & tree.XML_DOC_HTML != 0 ++ ) ++ + cdef inline int _assertValidNode(_Element element) except -1: + assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element) + +diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx +index 95dd21e..45f2cfa 100644 +--- a/src/lxml/etree.pyx ++++ b/src/lxml/etree.pyx +@@ -1546,7 +1546,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.find(self, path, namespaces) ++ return _elementpath.find(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def findtext(self, path, default=None, namespaces=None): + u"""findtext(self, path, default=None, namespaces=None) +@@ -1559,7 +1559,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.findtext(self, path, default, namespaces) ++ return _elementpath.findtext(self, path, default, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def findall(self, path, namespaces=None): + u"""findall(self, path, namespaces=None) +@@ -1572,7 +1572,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.findall(self, path, namespaces) ++ return _elementpath.findall(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def iterfind(self, path, namespaces=None): + u"""iterfind(self, path, namespaces=None) +@@ -1585,7 +1585,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.iterfind(self, path, namespaces) ++ return _elementpath.iterfind(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def xpath(self, _path, *, namespaces=None, extensions=None, + smart_strings=True, **_variables): +diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd +index 010af80..d709313 100644 +--- a/src/lxml/includes/tree.pxd ++++ b/src/lxml/includes/tree.pxd +@@ -154,6 +154,17 @@ cdef extern from "libxml/tree.h": + XML_EXTERNAL_PARAMETER_ENTITY= 5 + XML_INTERNAL_PREDEFINED_ENTITY= 6 + ++ ctypedef enum xmlDocProperties: ++ XML_DOC_WELLFORMED = 1 # /* document is XML well formed */ ++ XML_DOC_NSVALID = 2 # /* document is Namespace valid */ ++ XML_DOC_OLD10 = 4 # /* parsed with old XML-1.0 parser */ ++ XML_DOC_DTDVALID = 8 # /* DTD validation was successful */ ++ XML_DOC_XINCLUDE = 16 # /* XInclude substitution was done */ ++ XML_DOC_USERBUILT = 32 # /* Document was built using the API ++ # and not by parsing an instance */ ++ XML_DOC_INTERNAL = 64 # /* built for internal processing */ ++ XML_DOC_HTML = 128 # /* parsed or built HTML document */ ++ + ctypedef struct xmlNs: + const_xmlChar* href + const_xmlChar* prefix +@@ -274,6 +285,7 @@ cdef extern from "libxml/tree.h": + void* _private + xmlDtd* intSubset + xmlDtd* extSubset ++ int properties + + ctypedef struct xmlAttr: + void* _private +diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py +index 3e52258..f2620bd 100644 +--- a/src/lxml/tests/test_etree.py ++++ b/src/lxml/tests/test_etree.py +@@ -3058,11 +3058,29 @@ class ETreeOnlyTestCase(HelperTestCase): + + def test_html_prefix_nsmap(self): + etree = self.etree +- el = etree.HTML('aa').find('.//page-description') +- if etree.LIBXML_VERSION < (2, 9, 11): +- self.assertEqual({'hha': None}, el.nsmap) ++ el = etree.HTML('aa') ++ pd = el[-1] ++ while len(pd): ++ pd = pd[-1] ++ ++ if etree.LIBXML_VERSION >= (2, 10, 4): ++ # "Prefix" is kept as part of the tag name. ++ self.assertEqual("hha:page-description", pd.tag) ++ self.assertIsNone(el.find('.//page-description')) ++ self.assertIsNotNone(el.find('.//hha:page-description')) # no namespaces! ++ for e in el.iter(): ++ self.assertEqual({}, e.nsmap) ++ elif etree.LIBXML_VERSION >= (2, 9, 11): ++ # "Prefix" is stripped. ++ self.assertEqual("page-description", pd.tag) ++ self.assertIsNotNone(el.find('.//page-description')) ++ for e in el.iter(): ++ self.assertEqual({}, e.nsmap) + else: +- self.assertEqual({}, el.nsmap) ++ # "Prefix" is parsed as XML prefix. ++ self.assertEqual("page-description", pd.tag) ++ pd = el.find('.//page-description') ++ self.assertEqual({'hha': None}, pd.nsmap) + + def test_getchildren(self): + Element = self.etree.Element diff --git a/0002-Fix-test_elementtree-with-Expat-2.6.0.patch b/0002-Fix-test_elementtree-with-Expat-2.6.0.patch new file mode 100644 index 0000000000000000000000000000000000000000..f95b981fa10cd385a81755e6d69c328726d1ae8d --- /dev/null +++ b/0002-Fix-test_elementtree-with-Expat-2.6.0.patch @@ -0,0 +1,58 @@ +diff --git a/src/lxml/tests/test_elementtree.py b/src/lxml/tests/test_elementtree.py +index 96426cb..ef3302e 100644 +--- a/src/lxml/tests/test_elementtree.py ++++ b/src/lxml/tests/test_elementtree.py +@@ -4396,29 +4396,30 @@ class _XMLPullParserTest(unittest.TestCase): + self.assertEqual([(action, elem.tag) for action, elem in events], + expected) + +- def test_simple_xml(self): +- for chunk_size in (None, 1, 5): +- #with self.subTest(chunk_size=chunk_size): +- parser = self.etree.XMLPullParser() +- self.assert_event_tags(parser, []) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, []) +- self._feed(parser, +- "\n text\n", chunk_size) +- self.assert_event_tags(parser, [('end', 'element')]) +- self._feed(parser, "texttail\n", chunk_size) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, [ +- ('end', 'element'), +- ('end', 'empty-element'), +- ]) +- self._feed(parser, "\n", chunk_size) +- self.assert_event_tags(parser, [('end', 'root')]) +- root = self._close_and_return_root(parser) +- self.assertEqual(root.tag, 'root') ++ def test_simple_xml(self, chunk_size=None): ++ parser = self.etree.XMLPullParser() ++ self.assert_event_tags(parser, []) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, []) ++ self._feed(parser, ++ "\n text\n", chunk_size) ++ self.assert_event_tags(parser, [('end', 'element')]) ++ self._feed(parser, "texttail\n", chunk_size) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, [ ++ ('end', 'element'), ++ ('end', 'empty-element'), ++ ]) ++ self._feed(parser, "\n", chunk_size) ++ self.assert_event_tags(parser, [('end', 'root')]) ++ root = self._close_and_return_root(parser) ++ self.assertEqual(root.tag, 'root') ++ ++ def test_simple_xml_chunk_22(self): ++ self.test_simple_xml(chunk_size=22) + + def test_feed_while_iterating(self): + parser = self.etree.XMLPullParser() diff --git a/python-lxml.spec b/python-lxml.spec index 5a3392428997085866d4e95db241580c56ea5df1..dcff72afbaf374360a1a187bff7c509af2f37483 100644 --- a/python-lxml.spec +++ b/python-lxml.spec @@ -7,11 +7,13 @@ The latest release works with all CPython versions from 2.7 to 3.7. Name: python-%{modname} Version: 4.9.1 -Release: 2 +Release: 3 Summary: XML processing library combining libxml2/libxslt with the ElementTree API License: BSD URL: https://github.com/lxml/lxml Source0: %_sourcedir/lxml-%{version}.tar.gz +Patch01: 0001-Adapt-to-libxml2-2.10.4-and-later.patch +Patch02: 0002-Fix-test_elementtree-with-Expat-2.6.0.patch BuildRequires: gcc libxml2-devel libxslt-devel @@ -52,6 +54,9 @@ make test3 %doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt %changelog +* Tue Jun 03 2025 tzing_t - 4.9.1-3 +- Fix build error + * Mon Jul 25 2022 shixuantong - 4.9.1-2 - Remove pregenerated Cython C sources