diff --git a/Don-t-add-formatting-newlines-to-XInclude-nodes.patch b/Don-t-add-formatting-newlines-to-XInclude-nodes.patch new file mode 100644 index 0000000000000000000000000000000000000000..e00b8f8f715f3bce46d747717e02542c22913f78 --- /dev/null +++ b/Don-t-add-formatting-newlines-to-XInclude-nodes.patch @@ -0,0 +1,38 @@ +From 00a86d414ba9a9e1cd588182b87518e4e3af9466 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Sun, 16 Aug 2020 23:38:00 +0200 +Subject: [PATCH] Don't add formatting newlines to XInclude nodes + +--- + xmlsave.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/xmlsave.c b/xmlsave.c +index f1d40b9..2225628 100644 +--- a/xmlsave.c ++++ b/xmlsave.c +@@ -1049,7 +1049,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + while (1) { + if (cur == root) + return; +- if (ctxt->format == 1) ++ if ((ctxt->format == 1) && ++ (cur->type != XML_XINCLUDE_START) && ++ (cur->type != XML_XINCLUDE_END)) + xmlOutputBufferWrite(buf, 1, "\n"); + if (cur->next != NULL) { + cur = cur->next; +@@ -1224,7 +1226,9 @@ xmlDocContentDumpOutput(xmlSaveCtxtPtr ctxt, xmlDocPtr cur) { + else + #endif + xmlNodeDumpOutputInternal(ctxt, child); +- xmlOutputBufferWrite(buf, 1, "\n"); ++ if ((child->type != XML_XINCLUDE_START) && ++ (child->type != XML_XINCLUDE_END)) ++ xmlOutputBufferWrite(buf, 1, "\n"); + child = child->next; + } + } +-- +1.8.3.1 + diff --git a/Fix-NodeDumpOutput-functions.patch b/Fix-NodeDumpOutput-functions.patch new file mode 100644 index 0000000000000000000000000000000000000000..3f62d442fb510b0737b4ded588c89238e3cd3aeb --- /dev/null +++ b/Fix-NodeDumpOutput-functions.patch @@ -0,0 +1,66 @@ +From 7b2e5172616406edcb5b84d048fa590c997784b3 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Tue, 28 Jul 2020 21:52:55 +0200 +Subject: [PATCH] Fix *NodeDumpOutput functions + +Only output end tag for elements. Should fix serialization of document +fragments. +--- + xmlsave.c | 54 ++++++++++++++++++++++++++++++------------------------ + 1 file changed, 30 insertions(+), 24 deletions(-) + +diff --git a/xmlsave.c b/xmlsave.c +index 2235c8f..f2e0ea8 100644 +--- a/xmlsave.c ++++ b/xmlsave.c +@@ -1049,9 +1049,8 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + while (1) { + if (cur == root) + return; +- if (ctxt->format == 1) { ++ if (ctxt->format == 1) + xmlOutputBufferWrite(buf, 1, "\n"); +- } + if (cur->next != NULL) { + cur = cur->next; + break; +@@ -1065,21 +1064,25 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + (ctxt->level > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level), + ctxt->indent); ++ ++ if (cur->type == XML_ELEMENT_NODE) { ++ xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { ++ xmlOutputBufferWriteString(buf, ++ (const char *)cur->ns->prefix); ++ xmlOutputBufferWrite(buf, 1, ":"); ++ } ++ ++ xmlOutputBufferWriteString(buf, (const char *)cur->name); ++ if (ctxt->format == 2) ++ xmlOutputBufferWriteWSNonSig(ctxt, 0); ++ xmlOutputBufferWrite(buf, 1, ">"); ++ } ++ + if (cur == unformattedNode) { + ctxt->format = format; + unformattedNode = NULL; + } +- +- xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { +- xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); +- xmlOutputBufferWrite(buf, 1, ":"); +- } +- +- xmlOutputBufferWriteString(buf, (const char *)cur->name); +- if (ctxt->format == 2) +- xmlOutputBufferWriteWSNonSig(ctxt, 0); +- xmlOutputBufferWrite(buf, 1, ">"); + } + } + } +-- +1.8.3.1 + diff --git a/Fix-corner-case-with-empty-xi-fallback.patch b/Fix-corner-case-with-empty-xi-fallback.patch new file mode 100644 index 0000000000000000000000000000000000000000..8130d54d42d80ed513639bce1310f01179b33309 --- /dev/null +++ b/Fix-corner-case-with-empty-xi-fallback.patch @@ -0,0 +1,90 @@ +From d88df4bd48ba4ce9a68040a2427b4a665d5ff891 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Sun, 16 Aug 2020 23:38:48 +0200 +Subject: [PATCH] Fix corner case with empty xi:fallback + +xi:fallback could become empty after recursive expansion. Use a flag +to track whether nodes should be skipped. +--- + result/XInclude/fallback6.xml | 1 + + result/XInclude/fallback6.xml.rdr | 0 + test/XInclude/docs/fallback6.xml | 6 ++++++ + xinclude.c | 11 ++++------- + 4 files changed, 11 insertions(+), 7 deletions(-) + create mode 100644 result/XInclude/fallback6.xml + create mode 100644 result/XInclude/fallback6.xml.rdr + create mode 100644 test/XInclude/docs/fallback6.xml + +diff --git a/result/XInclude/fallback6.xml b/result/XInclude/fallback6.xml +new file mode 100644 +index 0000000..2b5d411 +--- /dev/null ++++ b/result/XInclude/fallback6.xml +@@ -0,0 +1 @@ ++ +diff --git a/result/XInclude/fallback6.xml.rdr b/result/XInclude/fallback6.xml.rdr +new file mode 100644 +index 0000000..e69de29 +diff --git a/test/XInclude/docs/fallback6.xml b/test/XInclude/docs/fallback6.xml +new file mode 100644 +index 0000000..fd00a03 +--- /dev/null ++++ b/test/XInclude/docs/fallback6.xml +@@ -0,0 +1,6 @@ ++ ++ ++ ++ ++ ++ +diff --git a/xinclude.c b/xinclude.c +index ff265eb..e9d3af5 100644 +--- a/xinclude.c ++++ b/xinclude.c +@@ -60,7 +60,7 @@ struct _xmlXIncludeRef { + int xml; /* xml or txt */ + int count; /* how many refs use that specific doc */ + xmlXPathObjectPtr xptr; /* the xpointer if needed */ +- int emptyFb; /* flag to show fallback empty */ ++ int skip; /* skip in case of errors */ + }; + + struct _xmlXIncludeCtxt { +@@ -2007,7 +2007,6 @@ xmlXIncludeLoadFallback(xmlXIncludeCtxtPtr ctxt, xmlNodePtr fallback, int nr) { + fallback->children); + } else { + ctxt->incTab[nr]->inc = NULL; +- ctxt->incTab[nr]->emptyFb = 1; /* flag empty callback */ + } + return(ret); + } +@@ -2164,13 +2163,13 @@ xmlXIncludeLoadNode(xmlXIncludeCtxtPtr ctxt, int nr) { + ((xmlStrEqual(children->ns->href, XINCLUDE_NS)) || + (xmlStrEqual(children->ns->href, XINCLUDE_OLD_NS)))) { + ret = xmlXIncludeLoadFallback(ctxt, children, nr); +- if (ret == 0) +- break; ++ break; + } + children = children->next; + } + } + if (ret < 0) { ++ ctxt->incTab[nr]->skip = 1; + xmlXIncludeErr(ctxt, ctxt->incTab[nr]->ref, + XML_XINCLUDE_NO_FALLBACK, + "could not load %s, and no fallback was found\n", +@@ -2468,9 +2467,7 @@ xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree, + * + */ + for (i = ctxt->incBase;i < ctxt->incNr; i++) { +- if ((ctxt->incTab[i]->inc != NULL) || +- (ctxt->incTab[i]->xptr != NULL) || +- (ctxt->incTab[i]->emptyFb != 0)) /* (empty fallback) */ ++ if (ctxt->incTab[i]->skip == 0) + xmlXIncludeIncludeNode(ctxt, i); + } + +-- +1.8.3.1 + diff --git a/Fix-error-reporting-with-xi-fallback.patch b/Fix-error-reporting-with-xi-fallback.patch new file mode 100644 index 0000000000000000000000000000000000000000..1c7cdaa00db787965f95091fa9c2ea71f5fb4396 --- /dev/null +++ b/Fix-error-reporting-with-xi-fallback.patch @@ -0,0 +1,91 @@ +From 2c747129779be9e3ce84a2f98ce5052a68d41098 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Mon, 17 Aug 2020 00:54:12 +0200 +Subject: [PATCH] Fix error reporting with xi:fallback + +When reporting errors, don't use href of xi:include if xi:fallback +was used. I think this can only be reproduced with +"xmllint --postvalid", see the original bug report: + +https://bugzilla.gnome.org/show_bug.cgi?id=152623 +--- + error.c | 22 +++++++++++----------- + xinclude.c | 4 ++++ + 2 files changed, 15 insertions(+), 11 deletions(-) + +diff --git a/error.c b/error.c +index 3e41e17..9ff1c2b 100644 +--- a/error.c ++++ b/error.c +@@ -557,6 +557,7 @@ __xmlRaiseError(xmlStructuredErrorFunc schannel, + * of the usual "base" (doc->URL) for the node (bug 152623). + */ + xmlNodePtr prev = baseptr; ++ char *href = NULL; + int inclcount = 0; + while (prev != NULL) { + if (prev->prev == NULL) +@@ -564,21 +565,20 @@ __xmlRaiseError(xmlStructuredErrorFunc schannel, + else { + prev = prev->prev; + if (prev->type == XML_XINCLUDE_START) { +- if (--inclcount < 0) +- break; ++ if (inclcount > 0) { ++ --inclcount; ++ } else { ++ href = (char *) xmlGetProp(prev, BAD_CAST "href"); ++ if (href != NULL) ++ break; ++ } + } else if (prev->type == XML_XINCLUDE_END) + inclcount++; + } + } +- if (prev != NULL) { +- if (prev->type == XML_XINCLUDE_START) { +- prev->type = XML_ELEMENT_NODE; +- to->file = (char *) xmlGetProp(prev, BAD_CAST "href"); +- prev->type = XML_XINCLUDE_START; +- } else { +- to->file = (char *) xmlGetProp(prev, BAD_CAST "href"); +- } +- } else ++ if (href != NULL) ++ to->file = href; ++ else + #endif + to->file = (char *) xmlStrdup(baseptr->doc->URL); + if ((to->file == NULL) && (node != NULL) && (node->doc != NULL)) { +diff --git a/xinclude.c b/xinclude.c +index 9a65ee5..2423a93 100644 +--- a/xinclude.c ++++ b/xinclude.c +@@ -61,6 +61,7 @@ struct _xmlXIncludeRef { + int count; /* how many refs use that specific doc */ + xmlXPathObjectPtr xptr; /* the xpointer if needed */ + int skip; /* skip in case of errors */ ++ int fallback; /* fallback was loaded */ + }; + + struct _xmlXIncludeCtxt { +@@ -2007,6 +2008,7 @@ xmlXIncludeLoadFallback(xmlXIncludeCtxtPtr ctxt, xmlNodePtr fallback, int nr) { + } else { + ctxt->incTab[nr]->inc = NULL; + } ++ ctxt->incTab[nr]->fallback = 1; + return(ret); + } + +@@ -2266,6 +2268,8 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { + * Change the current node as an XInclude start one, and add an + * XInclude end one + */ ++ if (ctxt->incTab[nr]->fallback) ++ xmlUnsetProp(cur, BAD_CAST "href"); + cur->type = XML_XINCLUDE_START; + end = xmlNewDocNode(cur->doc, cur->ns, cur->name, NULL); + if (end == NULL) { +-- +1.8.3.1 + diff --git a/Fix-quadratic-runtime-in-xi-fallback-processing.patch b/Fix-quadratic-runtime-in-xi-fallback-processing.patch new file mode 100644 index 0000000000000000000000000000000000000000..6c72c29808f40c56a8e324a5a1d71a1733971f11 --- /dev/null +++ b/Fix-quadratic-runtime-in-xi-fallback-processing.patch @@ -0,0 +1,63 @@ +From 27119ec33c9f6b9830efa1e0da0acfa353dfa55a Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Mon, 17 Aug 2020 00:05:19 +0200 +Subject: [PATCH] Fix quadratic runtime in xi:fallback processing + +Copying the tree would lead to runtime quadratic in nested fallback +depth, similar to naive string concatenation. +--- + xinclude.c | 23 +++++++++++------------ + 1 file changed, 11 insertions(+), 12 deletions(-) + +diff --git a/xinclude.c b/xinclude.c +index e9d3af5..9a65ee5 100644 +--- a/xinclude.c ++++ b/xinclude.c +@@ -2003,8 +2003,7 @@ xmlXIncludeLoadFallback(xmlXIncludeCtxtPtr ctxt, xmlNodePtr fallback, int nr) { + ret = -1; + xmlXIncludeFreeContext(newctxt); + +- ctxt->incTab[nr]->inc = xmlDocCopyNodeList(ctxt->doc, +- fallback->children); ++ ctxt->incTab[nr]->inc = fallback->children; + } else { + ctxt->incTab[nr]->inc = NULL; + } +@@ -2268,12 +2267,6 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { + * XInclude end one + */ + cur->type = XML_XINCLUDE_START; +- /* Remove fallback children */ +- for (child = cur->children; child != NULL; child = next) { +- next = child->next; +- xmlUnlinkNode(child); +- xmlFreeNode(child); +- } + end = xmlNewDocNode(cur->doc, cur->ns, cur->name, NULL); + if (end == NULL) { + xmlXIncludeErr(ctxt, ctxt->incTab[nr]->ref, +@@ -2289,11 +2282,17 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { + * Add the list of nodes + */ + while (list != NULL) { +- cur = list; +- list = list->next; +- +- xmlAddPrevSibling(end, cur); ++ next = list->next; ++ xmlAddPrevSibling(end, list); ++ list = next; + } ++ ++ /* Remove fallback node */ ++ for (child = cur->children; child != NULL; child = next) { ++ next = child->next; ++ xmlUnlinkNode(child); ++ xmlFreeNode(child); ++ } + } + + +-- +1.8.3.1 + diff --git a/Fix-regression-in-xmlNodeDumpOutputInternal.patch b/Fix-regression-in-xmlNodeDumpOutputInternal.patch new file mode 100644 index 0000000000000000000000000000000000000000..c3c2bc14a38d324b4aa0207087cd6ff059e786cc --- /dev/null +++ b/Fix-regression-in-xmlNodeDumpOutputInternal.patch @@ -0,0 +1,46 @@ +From 13ad8736d294536da4cbcd70a96b0a2fbf47070c Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Tue, 25 May 2021 10:55:25 +0200 +Subject: [PATCH] Fix regression in xmlNodeDumpOutputInternal + +Commit 85b1792e could cause additional whitespace if xmlNodeDump was +called with a non-zero starting level. +--- + xmlsave.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/xmlsave.c b/xmlsave.c +index aedbd5e..489505f 100644 +--- a/xmlsave.c ++++ b/xmlsave.c +@@ -890,6 +890,13 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + + case XML_ELEMENT_NODE: ++ if ((cur != root) && (ctxt->format == 1) && ++ (xmlIndentTreeOutput)) ++ xmlOutputBufferWrite(buf, ctxt->indent_size * ++ (ctxt->level > ctxt->indent_nr ? ++ ctxt->indent_nr : ctxt->level), ++ ctxt->indent); ++ + /* + * Some users like lxml are known to pass nodes with a corrupted + * tree structure. Fall back to a recursive call to handle this +@@ -900,13 +907,6 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + } + +- if ((ctxt->level > 0) && (ctxt->format == 1) && +- (xmlIndentTreeOutput)) +- xmlOutputBufferWrite(buf, ctxt->indent_size * +- (ctxt->level > ctxt->indent_nr ? +- ctxt->indent_nr : ctxt->level), +- ctxt->indent); +- + xmlOutputBufferWrite(buf, 1, "<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); +-- +1.8.3.1 + diff --git a/Fix-regression-introduced-with-commit-74dcc10b.patch b/Fix-regression-introduced-with-commit-74dcc10b.patch new file mode 100644 index 0000000000000000000000000000000000000000..0d3330e523e10b26d1174732c1c9ce52f24eeded --- /dev/null +++ b/Fix-regression-introduced-with-commit-74dcc10b.patch @@ -0,0 +1,97 @@ +From 87d20b554c6a90e7ece1cc7391c005089bf85b78 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Wed, 19 Aug 2020 13:52:08 +0200 +Subject: [PATCH] Fix regression introduced with commit 74dcc10b + +The code wasn't dead after all, but I can see no reason in delaying +the XPointer evaluation. This could lead to nodes included earlier +appearing in XPointer results. +--- + result/XInclude/ns1.xml | 10 ++++++++++ + result/XInclude/ns1.xml.rdr | 23 +++++++++++++++++++++++ + test/XInclude/docs/ns1.xml | 12 ++++++++++++ + xinclude.c | 2 +- + 4 files changed, 46 insertions(+), 1 deletion(-) + create mode 100644 result/XInclude/ns1.xml + create mode 100644 result/XInclude/ns1.xml.rdr + create mode 100644 test/XInclude/docs/ns1.xml + +diff --git a/result/XInclude/ns1.xml b/result/XInclude/ns1.xml +new file mode 100644 +index 0000000..ab41fb7 +--- /dev/null ++++ b/result/XInclude/ns1.xml +@@ -0,0 +1,10 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/result/XInclude/ns1.xml.rdr b/result/XInclude/ns1.xml.rdr +new file mode 100644 +index 0000000..f23702f +--- /dev/null ++++ b/result/XInclude/ns1.xml.rdr +@@ -0,0 +1,23 @@ ++0 1 doc 0 0 ++1 14 #text 0 1 ++ ++1 1 ns:elem 1 0 ++1 14 #text 0 1 ++ ++1 1 elem 0 0 ++2 14 #text 0 1 ++ ++2 1 ns:elem 1 0 ++2 14 #text 0 1 ++ ++1 15 elem 0 0 ++1 14 #text 0 1 ++ ++1 14 #text 0 1 ++ ++1 1 ns:elem 1 0 ++1 14 #text 0 1 ++ ++1 14 #text 0 1 ++ ++0 15 doc 0 0 +diff --git a/test/XInclude/docs/ns1.xml b/test/XInclude/docs/ns1.xml +new file mode 100644 +index 0000000..7523f4a +--- /dev/null ++++ b/test/XInclude/docs/ns1.xml +@@ -0,0 +1,12 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +diff --git a/xinclude.c b/xinclude.c +index aac30d5..c92b32b 100644 +--- a/xinclude.c ++++ b/xinclude.c +@@ -1464,7 +1464,7 @@ xmlXIncludeLoadDoc(xmlXIncludeCtxtPtr ctxt, const xmlChar *url, int nr) { + */ + if ((URL[0] == 0) || (URL[0] == '#') || + ((ctxt->doc != NULL) && (xmlStrEqual(URL, ctxt->doc->URL)))) { +- doc = NULL; ++ doc = ctxt->doc; + goto loaded; + } + +-- +1.8.3.1 + diff --git a/Fix-regression-introduced-with-commit-d88df4b.patch b/Fix-regression-introduced-with-commit-d88df4b.patch new file mode 100644 index 0000000000000000000000000000000000000000..0c1ef9d3cf0c057aeb9ed89a39c6c8b8dd85691e --- /dev/null +++ b/Fix-regression-introduced-with-commit-d88df4b.patch @@ -0,0 +1,67 @@ +From 3fcf319378f9396a9ca840cd63b96a441818e1f1 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Sat, 22 Aug 2020 00:43:18 +0200 +Subject: [PATCH] Fix regression introduced with commit d88df4b + +Revert the commit and use a different approach. + +Found by OSS-Fuzz. +--- + xinclude.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/xinclude.c b/xinclude.c +index c92b32b..f48e0af 100644 +--- a/xinclude.c ++++ b/xinclude.c +@@ -59,8 +59,8 @@ struct _xmlXIncludeRef { + xmlNodePtr inc; /* the included copy */ + int xml; /* xml or txt */ + int count; /* how many refs use that specific doc */ +- int skip; /* skip in case of errors */ + int fallback; /* fallback was loaded */ ++ int emptyFb; /* flag to show fallback empty */ + }; + + struct _xmlXIncludeCtxt { +@@ -1988,8 +1988,11 @@ xmlXIncludeLoadFallback(xmlXIncludeCtxtPtr ctxt, xmlNodePtr fallback, int nr) { + + ctxt->incTab[nr]->inc = xmlDocCopyNodeList(ctxt->doc, + fallback->children); ++ if (ctxt->incTab[nr]->inc == NULL) ++ ctxt->incTab[nr]->emptyFb = 1; + } else { + ctxt->incTab[nr]->inc = NULL; ++ ctxt->incTab[nr]->emptyFb = 1; /* flag empty callback */ + } + ctxt->incTab[nr]->fallback = 1; + return(ret); +@@ -2153,7 +2156,6 @@ xmlXIncludeLoadNode(xmlXIncludeCtxtPtr ctxt, int nr) { + } + } + if (ret < 0) { +- ctxt->incTab[nr]->skip = 1; + xmlXIncludeErr(ctxt, ctxt->incTab[nr]->ref, + XML_XINCLUDE_NO_FALLBACK, + "could not load %s, and no fallback was found\n", +@@ -2197,6 +2199,7 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { + + list = ctxt->incTab[nr]->inc; + ctxt->incTab[nr]->inc = NULL; ++ ctxt->incTab[nr]->emptyFb = 0; + + /* + * Check against the risk of generating a multi-rooted document +@@ -2459,7 +2462,8 @@ xmlXIncludeDoProcess(xmlXIncludeCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr tree, + * + */ + for (i = ctxt->incBase;i < ctxt->incNr; i++) { +- if (ctxt->incTab[i]->skip == 0) ++ if ((ctxt->incTab[i]->inc != NULL) || ++ (ctxt->incTab[i]->emptyFb != 0)) /* (empty fallback) */ + xmlXIncludeIncludeNode(ctxt, i); + } + +-- +1.8.3.1 + diff --git a/Handle-dumps-of-corrupted-documents-more-gracefully.patch b/Handle-dumps-of-corrupted-documents-more-gracefully.patch new file mode 100644 index 0000000000000000000000000000000000000000..8c0ba6de5ef7e8446408ff659d32a26baa85a3c9 --- /dev/null +++ b/Handle-dumps-of-corrupted-documents-more-gracefully.patch @@ -0,0 +1,50 @@ +From 0b3c64d9f2f3e9ce1a98d8f19ee7a763c87e27d5 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Tue, 29 Sep 2020 18:08:37 +0200 +Subject: [PATCH] Handle dumps of corrupted documents more gracefully + +Check parent pointers for NULL after the non-recursive rewrite of the +serialization code. This avoids segfaults with corrupted documents +which can apparently be seen with lxml, see issue #187. +--- + HTMLtree.c | 6 ++++++ + xmlsave.c | 12 ++++++++++++ + 2 files changed, 18 insertions(+) + +diff --git a/HTMLtree.c b/HTMLtree.c +index cdb7f86..8d0c779 100644 +--- a/HTMLtree.c ++++ b/HTMLtree.c +@@ -903,6 +903,12 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + break; + } + ++ /* ++ * The parent should never be NULL here but we want to handle ++ * corrupted documents gracefully. ++ */ ++ if (cur->parent == NULL) ++ return; + cur = cur->parent; + + if ((cur->type == XML_HTML_DOCUMENT_NODE) || +diff --git a/xmlsave.c b/xmlsave.c +index 2225628..61a4045 100644 +--- a/xmlsave.c ++++ b/xmlsave.c +@@ -1058,6 +1058,12 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + } + ++ /* ++ * The parent should never be NULL here but we want to handle ++ * corrupted documents gracefully. ++ */ ++ if (cur->parent == NULL) ++ return; + cur = cur->parent; + + if (cur->type == XML_ELEMENT_NODE) { +-- +1.8.3.1 + diff --git a/Make-htmlNodeDumpFormatOutput-non-recursive.patch b/Make-htmlNodeDumpFormatOutput-non-recursive.patch new file mode 100644 index 0000000000000000000000000000000000000000..bc582df14a7a48c29285858d075e103307a4026f --- /dev/null +++ b/Make-htmlNodeDumpFormatOutput-non-recursive.patch @@ -0,0 +1,470 @@ +From b79ab6e6d9270666c5dcd2fd85e4c8563d13f922 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Tue, 28 Jul 2020 02:42:37 +0200 +Subject: [PATCH] Make htmlNodeDumpFormatOutput non-recursive + +Fixes stack overflow with deeply nested HTML documents. + +Found by OSS-Fuzz. +--- + HTMLtree.c | 410 ++++++++++++++++++++++++++++--------------------------------- + 1 file changed, 185 insertions(+), 225 deletions(-) + +diff --git a/HTMLtree.c b/HTMLtree.c +index fe5d086..8d236bb 100644 +--- a/HTMLtree.c ++++ b/HTMLtree.c +@@ -760,50 +760,6 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, + } + + /** +- * htmlAttrListDumpOutput: +- * @buf: the HTML buffer output +- * @doc: the document +- * @cur: the first attribute pointer +- * @encoding: the encoding string +- * +- * Dump a list of HTML attributes +- */ +-static void +-htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { +- if (cur == NULL) { +- return; +- } +- while (cur != NULL) { +- htmlAttrDumpOutput(buf, doc, cur, encoding); +- cur = cur->next; +- } +-} +- +- +- +-/** +- * htmlNodeListDumpOutput: +- * @buf: the HTML buffer output +- * @doc: the document +- * @cur: the first node +- * @encoding: the encoding string +- * @format: should formatting spaces been added +- * +- * Dump an HTML node list, recursive behaviour,children are printed too. +- */ +-static void +-htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, +- xmlNodePtr cur, const char *encoding, int format) { +- if (cur == NULL) { +- return; +- } +- while (cur != NULL) { +- htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); +- cur = cur->next; +- } +-} +- +-/** + * htmlNodeDumpFormatOutput: + * @buf: the HTML buffer output + * @doc: the document +@@ -816,6 +772,8 @@ htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + void + htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding, int format) { ++ xmlNodePtr root; ++ xmlAttrPtr attr; + const htmlElemDesc * info; + + xmlInitParser(); +@@ -823,172 +781,193 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + if ((cur == NULL) || (buf == NULL)) { + return; + } +- /* +- * Special cases. +- */ +- if (cur->type == XML_DTD_NODE) +- return; +- if ((cur->type == XML_HTML_DOCUMENT_NODE) || +- (cur->type == XML_DOCUMENT_NODE)){ +- htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); +- return; +- } +- if (cur->type == XML_ATTRIBUTE_NODE) { +- htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding); +- return; +- } +- if (cur->type == HTML_TEXT_NODE) { +- if (cur->content != NULL) { +- if (((cur->name == (const xmlChar *)xmlStringText) || +- (cur->name != (const xmlChar *)xmlStringTextNoenc)) && +- ((cur->parent == NULL) || +- ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && +- (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { +- xmlChar *buffer; +- +- buffer = xmlEncodeEntitiesReentrant(doc, cur->content); +- if (buffer != NULL) { +- xmlOutputBufferWriteString(buf, (const char *)buffer); +- xmlFree(buffer); +- } +- } else { +- xmlOutputBufferWriteString(buf, (const char *)cur->content); +- } +- } +- return; +- } +- if (cur->type == HTML_COMMENT_NODE) { +- if (cur->content != NULL) { +- xmlOutputBufferWriteString(buf, ""); +- } +- return; +- } +- if (cur->type == HTML_PI_NODE) { +- if (cur->name == NULL) +- return; +- xmlOutputBufferWriteString(buf, "name); +- if (cur->content != NULL) { +- xmlOutputBufferWriteString(buf, " "); +- xmlOutputBufferWriteString(buf, (const char *)cur->content); +- } +- xmlOutputBufferWriteString(buf, ">"); +- return; +- } +- if (cur->type == HTML_ENTITY_REF_NODE) { +- xmlOutputBufferWriteString(buf, "&"); +- xmlOutputBufferWriteString(buf, (const char *)cur->name); +- xmlOutputBufferWriteString(buf, ";"); +- return; +- } +- if (cur->type == HTML_PRESERVE_NODE) { +- if (cur->content != NULL) { +- xmlOutputBufferWriteString(buf, (const char *)cur->content); +- } +- return; +- } + +- /* +- * Get specific HTML info for that node. +- */ +- if (cur->ns == NULL) +- info = htmlTagLookup(cur->name); +- else +- info = NULL; ++ root = cur; ++ while (1) { ++ switch (cur->type) { ++ case XML_HTML_DOCUMENT_NODE: ++ case XML_DOCUMENT_NODE: ++ if (((xmlDocPtr) cur)->intSubset != NULL) { ++ htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL); ++ } ++ if (cur->children != NULL) { ++ cur = cur->children; ++ continue; ++ } ++ break; + +- xmlOutputBufferWriteString(buf, "<"); +- if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { +- xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); +- xmlOutputBufferWriteString(buf, ":"); +- } +- xmlOutputBufferWriteString(buf, (const char *)cur->name); +- if (cur->nsDef) +- xmlNsListDumpOutput(buf, cur->nsDef); +- if (cur->properties != NULL) +- htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); +- +- if ((info != NULL) && (info->empty)) { +- xmlOutputBufferWriteString(buf, ">"); +- if ((format) && (!info->isinline) && (cur->next != NULL)) { +- if ((cur->next->type != HTML_TEXT_NODE) && +- (cur->next->type != HTML_ENTITY_REF_NODE) && +- (cur->parent != NULL) && +- (cur->parent->name != NULL) && +- (cur->parent->name[0] != 'p')) /* p, pre, param */ +- xmlOutputBufferWriteString(buf, "\n"); +- } +- return; +- } +- if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && +- (cur->children == NULL)) { +- if ((info != NULL) && (info->saveEndTag != 0) && +- (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && +- (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { +- xmlOutputBufferWriteString(buf, ">"); +- } else { +- xmlOutputBufferWriteString(buf, ">ns == NULL) ++ info = htmlTagLookup(cur->name); ++ else ++ info = NULL; ++ ++ xmlOutputBufferWriteString(buf, "<"); + if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); + xmlOutputBufferWriteString(buf, ":"); + } +- xmlOutputBufferWriteString(buf, (const char *)cur->name); +- xmlOutputBufferWriteString(buf, ">"); +- } +- if ((format) && (cur->next != NULL) && +- (info != NULL) && (!info->isinline)) { +- if ((cur->next->type != HTML_TEXT_NODE) && +- (cur->next->type != HTML_ENTITY_REF_NODE) && +- (cur->parent != NULL) && +- (cur->parent->name != NULL) && +- (cur->parent->name[0] != 'p')) /* p, pre, param */ +- xmlOutputBufferWriteString(buf, "\n"); +- } +- return; +- } +- xmlOutputBufferWriteString(buf, ">"); +- if ((cur->type != XML_ELEMENT_NODE) && +- (cur->content != NULL)) { +- /* +- * Uses the OutputBuffer property to automatically convert +- * invalids to charrefs +- */ +- +- xmlOutputBufferWriteString(buf, (const char *) cur->content); +- } +- if (cur->children != NULL) { +- if ((format) && (info != NULL) && (!info->isinline) && +- (cur->children->type != HTML_TEXT_NODE) && +- (cur->children->type != HTML_ENTITY_REF_NODE) && +- (cur->children != cur->last) && +- (cur->name != NULL) && +- (cur->name[0] != 'p')) /* p, pre, param */ +- xmlOutputBufferWriteString(buf, "\n"); +- htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format); +- if ((format) && (info != NULL) && (!info->isinline) && +- (cur->last->type != HTML_TEXT_NODE) && +- (cur->last->type != HTML_ENTITY_REF_NODE) && +- (cur->children != cur->last) && +- (cur->name != NULL) && +- (cur->name[0] != 'p')) /* p, pre, param */ +- xmlOutputBufferWriteString(buf, "\n"); +- } +- xmlOutputBufferWriteString(buf, "ns != NULL) && (cur->ns->prefix != NULL)) { +- xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); +- xmlOutputBufferWriteString(buf, ":"); +- } +- xmlOutputBufferWriteString(buf, (const char *)cur->name); +- xmlOutputBufferWriteString(buf, ">"); +- if ((format) && (info != NULL) && (!info->isinline) && +- (cur->next != NULL)) { +- if ((cur->next->type != HTML_TEXT_NODE) && +- (cur->next->type != HTML_ENTITY_REF_NODE) && +- (cur->parent != NULL) && +- (cur->parent->name != NULL) && +- (cur->parent->name[0] != 'p')) /* p, pre, param */ +- xmlOutputBufferWriteString(buf, "\n"); ++ xmlOutputBufferWriteString(buf, (const char *)cur->name); ++ if (cur->nsDef) ++ xmlNsListDumpOutput(buf, cur->nsDef); ++ attr = cur->properties; ++ while (attr != NULL) { ++ htmlAttrDumpOutput(buf, doc, attr, encoding); ++ attr = attr->next; ++ } ++ ++ if ((info != NULL) && (info->empty)) { ++ xmlOutputBufferWriteString(buf, ">"); ++ } else if (cur->children == NULL) { ++ if ((info != NULL) && (info->saveEndTag != 0) && ++ (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && ++ (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { ++ xmlOutputBufferWriteString(buf, ">"); ++ } else { ++ xmlOutputBufferWriteString(buf, ">ns != NULL) && (cur->ns->prefix != NULL)) { ++ xmlOutputBufferWriteString(buf, ++ (const char *)cur->ns->prefix); ++ xmlOutputBufferWriteString(buf, ":"); ++ } ++ xmlOutputBufferWriteString(buf, (const char *)cur->name); ++ xmlOutputBufferWriteString(buf, ">"); ++ } ++ } else { ++ xmlOutputBufferWriteString(buf, ">"); ++ if ((format) && (info != NULL) && (!info->isinline) && ++ (cur->children->type != HTML_TEXT_NODE) && ++ (cur->children->type != HTML_ENTITY_REF_NODE) && ++ (cur->children != cur->last) && ++ (cur->name != NULL) && ++ (cur->name[0] != 'p')) /* p, pre, param */ ++ xmlOutputBufferWriteString(buf, "\n"); ++ cur = cur->children; ++ continue; ++ } ++ ++ if ((format) && (cur->next != NULL) && ++ (info != NULL) && (!info->isinline)) { ++ if ((cur->next->type != HTML_TEXT_NODE) && ++ (cur->next->type != HTML_ENTITY_REF_NODE) && ++ (cur->parent != NULL) && ++ (cur->parent->name != NULL) && ++ (cur->parent->name[0] != 'p')) /* p, pre, param */ ++ xmlOutputBufferWriteString(buf, "\n"); ++ } ++ ++ break; ++ ++ case XML_ATTRIBUTE_NODE: ++ htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding); ++ break; ++ ++ case HTML_TEXT_NODE: ++ if (cur->content == NULL) ++ break; ++ if (((cur->name == (const xmlChar *)xmlStringText) || ++ (cur->name != (const xmlChar *)xmlStringTextNoenc)) && ++ ((cur->parent == NULL) || ++ ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && ++ (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { ++ xmlChar *buffer; ++ ++ buffer = xmlEncodeEntitiesReentrant(doc, cur->content); ++ if (buffer != NULL) { ++ xmlOutputBufferWriteString(buf, (const char *)buffer); ++ xmlFree(buffer); ++ } ++ } else { ++ xmlOutputBufferWriteString(buf, (const char *)cur->content); ++ } ++ break; ++ ++ case HTML_COMMENT_NODE: ++ if (cur->content != NULL) { ++ xmlOutputBufferWriteString(buf, ""); ++ } ++ break; ++ ++ case HTML_PI_NODE: ++ if (cur->name != NULL) { ++ xmlOutputBufferWriteString(buf, "name); ++ if (cur->content != NULL) { ++ xmlOutputBufferWriteString(buf, " "); ++ xmlOutputBufferWriteString(buf, ++ (const char *)cur->content); ++ } ++ xmlOutputBufferWriteString(buf, ">"); ++ } ++ break; ++ ++ case HTML_ENTITY_REF_NODE: ++ xmlOutputBufferWriteString(buf, "&"); ++ xmlOutputBufferWriteString(buf, (const char *)cur->name); ++ xmlOutputBufferWriteString(buf, ";"); ++ break; ++ ++ case HTML_PRESERVE_NODE: ++ if (cur->content != NULL) { ++ xmlOutputBufferWriteString(buf, (const char *)cur->content); ++ } ++ break; ++ ++ default: ++ break; ++ } ++ ++ while (1) { ++ if (cur == root) ++ return; ++ if (cur->next != NULL) { ++ cur = cur->next; ++ break; ++ } ++ ++ cur = cur->parent; ++ ++ if ((cur->type == XML_HTML_DOCUMENT_NODE) || ++ (cur->type == XML_DOCUMENT_NODE)) { ++ xmlOutputBufferWriteString(buf, "\n"); ++ } else { ++ if ((format) && (cur->ns == NULL)) ++ info = htmlTagLookup(cur->name); ++ else ++ info = NULL; ++ ++ if ((format) && (info != NULL) && (!info->isinline) && ++ (cur->last->type != HTML_TEXT_NODE) && ++ (cur->last->type != HTML_ENTITY_REF_NODE) && ++ (cur->children != cur->last) && ++ (cur->name != NULL) && ++ (cur->name[0] != 'p')) /* p, pre, param */ ++ xmlOutputBufferWriteString(buf, "\n"); ++ ++ xmlOutputBufferWriteString(buf, "ns != NULL) && (cur->ns->prefix != NULL)) { ++ xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); ++ xmlOutputBufferWriteString(buf, ":"); ++ } ++ xmlOutputBufferWriteString(buf, (const char *)cur->name); ++ xmlOutputBufferWriteString(buf, ">"); ++ ++ if ((format) && (info != NULL) && (!info->isinline) && ++ (cur->next != NULL)) { ++ if ((cur->next->type != HTML_TEXT_NODE) && ++ (cur->next->type != HTML_ENTITY_REF_NODE) && ++ (cur->parent != NULL) && ++ (cur->parent->name != NULL) && ++ (cur->parent->name[0] != 'p')) /* p, pre, param */ ++ xmlOutputBufferWriteString(buf, "\n"); ++ } ++ } ++ } + } + } + +@@ -1020,26 +999,7 @@ htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + void + htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, + const char *encoding, int format) { +- int type; +- +- xmlInitParser(); +- +- if ((buf == NULL) || (cur == NULL)) +- return; +- +- /* +- * force to output the stuff as HTML, especially for entities +- */ +- type = cur->type; +- cur->type = XML_HTML_DOCUMENT_NODE; +- if (cur->intSubset != NULL) { +- htmlDtdDumpOutput(buf, cur, NULL); +- } +- if (cur->children != NULL) { +- htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format); +- } +- xmlOutputBufferWriteString(buf, "\n"); +- cur->type = (xmlElementType) type; ++ htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, encoding, format); + } + + /** +@@ -1053,7 +1013,7 @@ htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, + void + htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, + const char *encoding) { +- htmlDocContentDumpFormatOutput(buf, cur, encoding, 1); ++ htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, encoding, 1); + } + + /************************************************************************ +-- +1.8.3.1 + diff --git a/Make-xmlNodeDumpOutputInternal-non-recursive.patch b/Make-xmlNodeDumpOutputInternal-non-recursive.patch new file mode 100644 index 0000000000000000000000000000000000000000..af6a4f059d837603918ef44d0e30c011b2e42d4a --- /dev/null +++ b/Make-xmlNodeDumpOutputInternal-non-recursive.patch @@ -0,0 +1,550 @@ +From dc6f009280e6108fe25f4c4ce32e18fb69cf496e Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Tue, 28 Jul 2020 19:07:19 +0200 +Subject: [PATCH] Make xmlNodeDumpOutputInternal non-recursive + +Fixes stack overflow with deeply nested documents. +--- + xmlsave.c | 470 ++++++++++++++++++++++++++++++++------------------------------ + 1 file changed, 240 insertions(+), 230 deletions(-) + +diff --git a/xmlsave.c b/xmlsave.c +index cf32d69..2235c8f 100644 +--- a/xmlsave.c ++++ b/xmlsave.c +@@ -590,7 +590,6 @@ static int xmlSaveClearEncoding(xmlSaveCtxtPtr ctxt) { + static void + xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur); + #endif +-static void xmlNodeListDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur); + static void xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur); + void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur); + static int xmlDocContentDumpOutput(xmlSaveCtxtPtr ctxt, xmlDocPtr cur); +@@ -705,6 +704,7 @@ xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur) { + static void + xmlDtdDumpOutput(xmlSaveCtxtPtr ctxt, xmlDtdPtr dtd) { + xmlOutputBufferPtr buf; ++ xmlNodePtr cur; + int format, level; + + if (dtd == NULL) return; +@@ -742,7 +742,9 @@ xmlDtdDumpOutput(xmlSaveCtxtPtr ctxt, xmlDtdPtr dtd) { + level = ctxt->level; + ctxt->format = 0; + ctxt->level = -1; +- xmlNodeListDumpOutput(ctxt, dtd->children); ++ for (cur = dtd->children; cur != NULL; cur = cur->next) { ++ xmlNodeDumpOutputInternal(ctxt, cur); ++ } + ctxt->format = format; + ctxt->level = level; + xmlOutputBufferWrite(buf, 2, "]>"); +@@ -776,58 +778,9 @@ xmlAttrDumpOutput(xmlSaveCtxtPtr ctxt, xmlAttrPtr cur) { + xmlOutputBufferWrite(buf, 1, "\""); + } + +-/** +- * xmlAttrListDumpOutput: +- * @buf: the XML buffer output +- * @doc: the document +- * @cur: the first attribute pointer +- * @encoding: an optional encoding string +- * +- * Dump a list of XML attributes +- */ +-static void +-xmlAttrListDumpOutput(xmlSaveCtxtPtr ctxt, xmlAttrPtr cur) { +- if (cur == NULL) return; +- while (cur != NULL) { +- xmlAttrDumpOutput(ctxt, cur); +- cur = cur->next; +- } +-} +- +- +- +-/** +- * xmlNodeListDumpOutput: +- * @cur: the first node +- * +- * Dump an XML node list, recursive behaviour, children are printed too. +- */ +-static void +-xmlNodeListDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { +- xmlOutputBufferPtr buf; +- +- if (cur == NULL) return; +- buf = ctxt->buf; +- while (cur != NULL) { +- if ((ctxt->format == 1) && (xmlIndentTreeOutput) && +- ((cur->type == XML_ELEMENT_NODE) || +- (cur->type == XML_COMMENT_NODE) || +- (cur->type == XML_PI_NODE))) +- xmlOutputBufferWrite(buf, ctxt->indent_size * +- (ctxt->level > ctxt->indent_nr ? +- ctxt->indent_nr : ctxt->level), +- ctxt->indent); +- xmlNodeDumpOutputInternal(ctxt, cur); +- if (ctxt->format == 1) { +- xmlOutputBufferWrite(buf, 1, "\n"); +- } +- cur = cur->next; +- } +-} +- + #ifdef LIBXML_HTML_ENABLED + /** +- * xmlNodeDumpOutputInternal: ++ * htmlNodeDumpOutputInternal: + * @cur: the current node + * + * Dump an HTML node, recursive behaviour, children are printed too. +@@ -893,57 +846,111 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + */ + static void + xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { +- int format; +- xmlNodePtr tmp; ++ int format = ctxt->format; ++ xmlNodePtr tmp, root, unformattedNode = NULL; ++ xmlAttrPtr attr; + xmlChar *start, *end; + xmlOutputBufferPtr buf; + + if (cur == NULL) return; + buf = ctxt->buf; +- if (cur->type == XML_XINCLUDE_START) +- return; +- if (cur->type == XML_XINCLUDE_END) +- return; +- if ((cur->type == XML_DOCUMENT_NODE) || +- (cur->type == XML_HTML_DOCUMENT_NODE)) { +- xmlDocContentDumpOutput(ctxt, (xmlDocPtr) cur); +- return; +- } +-#ifdef LIBXML_HTML_ENABLED +- if (ctxt->options & XML_SAVE_XHTML) { +- xhtmlNodeDumpOutput(ctxt, cur); +- return; +- } +- if (((cur->type != XML_NAMESPACE_DECL) && (cur->doc != NULL) && +- (cur->doc->type == XML_HTML_DOCUMENT_NODE) && +- ((ctxt->options & XML_SAVE_AS_XML) == 0)) || +- (ctxt->options & XML_SAVE_AS_HTML)) { +- htmlNodeDumpOutputInternal(ctxt, cur); +- return; +- } +-#endif +- if (cur->type == XML_DTD_NODE) { +- xmlDtdDumpOutput(ctxt, (xmlDtdPtr) cur); +- return; +- } +- if (cur->type == XML_DOCUMENT_FRAG_NODE) { +- xmlNodeListDumpOutput(ctxt, cur->children); +- return; +- } +- if (cur->type == XML_ELEMENT_DECL) { +- xmlBufDumpElementDecl(buf->buffer, (xmlElementPtr) cur); +- return; +- } +- if (cur->type == XML_ATTRIBUTE_DECL) { +- xmlBufDumpAttributeDecl(buf->buffer, (xmlAttributePtr) cur); +- return; +- } +- if (cur->type == XML_ENTITY_DECL) { +- xmlBufDumpEntityDecl(buf->buffer, (xmlEntityPtr) cur); +- return; +- } +- if (cur->type == XML_TEXT_NODE) { +- if (cur->content != NULL) { ++ ++ root = cur; ++ while (1) { ++ switch (cur->type) { ++ case XML_DOCUMENT_NODE: ++ case XML_HTML_DOCUMENT_NODE: ++ xmlDocContentDumpOutput(ctxt, (xmlDocPtr) cur); ++ break; ++ ++ case XML_DTD_NODE: ++ xmlDtdDumpOutput(ctxt, (xmlDtdPtr) cur); ++ break; ++ ++ case XML_DOCUMENT_FRAG_NODE: ++ if (cur->children != NULL) { ++ cur = cur->children; ++ continue; ++ } ++ break; ++ ++ case XML_ELEMENT_DECL: ++ xmlBufDumpElementDecl(buf->buffer, (xmlElementPtr) cur); ++ break; ++ ++ case XML_ATTRIBUTE_DECL: ++ xmlBufDumpAttributeDecl(buf->buffer, (xmlAttributePtr) cur); ++ break; ++ ++ case XML_ENTITY_DECL: ++ xmlBufDumpEntityDecl(buf->buffer, (xmlEntityPtr) cur); ++ break; ++ ++ case XML_ELEMENT_NODE: ++ if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput)) ++ xmlOutputBufferWrite(buf, ctxt->indent_size * ++ (ctxt->level > ctxt->indent_nr ? ++ ctxt->indent_nr : ctxt->level), ++ ctxt->indent); ++ ++ xmlOutputBufferWrite(buf, 1, "<"); ++ if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { ++ xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); ++ xmlOutputBufferWrite(buf, 1, ":"); ++ } ++ xmlOutputBufferWriteString(buf, (const char *)cur->name); ++ if (cur->nsDef) ++ xmlNsListDumpOutputCtxt(ctxt, cur->nsDef); ++ for (attr = cur->properties; attr != NULL; attr = attr->next) ++ xmlAttrDumpOutput(ctxt, attr); ++ ++ if (cur->children == NULL) { ++ if ((ctxt->options & XML_SAVE_NO_EMPTY) == 0) { ++ if (ctxt->format == 2) ++ xmlOutputBufferWriteWSNonSig(ctxt, 0); ++ xmlOutputBufferWrite(buf, 2, "/>"); ++ } else { ++ if (ctxt->format == 2) ++ xmlOutputBufferWriteWSNonSig(ctxt, 1); ++ xmlOutputBufferWrite(buf, 3, ">ns != NULL) && (cur->ns->prefix != NULL)) { ++ xmlOutputBufferWriteString(buf, ++ (const char *)cur->ns->prefix); ++ xmlOutputBufferWrite(buf, 1, ":"); ++ } ++ xmlOutputBufferWriteString(buf, (const char *)cur->name); ++ if (ctxt->format == 2) ++ xmlOutputBufferWriteWSNonSig(ctxt, 0); ++ xmlOutputBufferWrite(buf, 1, ">"); ++ } ++ } else { ++ if (ctxt->format == 1) { ++ tmp = cur->children; ++ while (tmp != NULL) { ++ if ((tmp->type == XML_TEXT_NODE) || ++ (tmp->type == XML_CDATA_SECTION_NODE) || ++ (tmp->type == XML_ENTITY_REF_NODE)) { ++ ctxt->format = 0; ++ unformattedNode = cur; ++ break; ++ } ++ tmp = tmp->next; ++ } ++ } ++ if (ctxt->format == 2) ++ xmlOutputBufferWriteWSNonSig(ctxt, 1); ++ xmlOutputBufferWrite(buf, 1, ">"); ++ if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n"); ++ if (ctxt->level >= 0) ctxt->level++; ++ cur = cur->children; ++ continue; ++ } ++ ++ break; ++ ++ case XML_TEXT_NODE: ++ if (cur->content == NULL) ++ break; + if (cur->name != xmlStringTextNoenc) { + xmlOutputBufferWriteEscape(buf, cur->content, ctxt->escape); + } else { +@@ -952,139 +959,129 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + */ + xmlOutputBufferWriteString(buf, (const char *) cur->content); + } +- } ++ break; + +- return; +- } +- if (cur->type == XML_PI_NODE) { +- if (cur->content != NULL) { +- xmlOutputBufferWrite(buf, 2, "name); +- if (cur->content != NULL) { +- if (ctxt->format == 2) +- xmlOutputBufferWriteWSNonSig(ctxt, 0); +- else +- xmlOutputBufferWrite(buf, 1, " "); +- xmlOutputBufferWriteString(buf, (const char *)cur->content); +- } +- xmlOutputBufferWrite(buf, 2, "?>"); +- } else { +- xmlOutputBufferWrite(buf, 2, "name); +- if (ctxt->format == 2) +- xmlOutputBufferWriteWSNonSig(ctxt, 0); +- xmlOutputBufferWrite(buf, 2, "?>"); +- } +- return; +- } +- if (cur->type == XML_COMMENT_NODE) { +- if (cur->content != NULL) { +- xmlOutputBufferWrite(buf, 4, ""); +- } +- return; +- } +- if (cur->type == XML_ENTITY_REF_NODE) { +- xmlOutputBufferWrite(buf, 1, "&"); +- xmlOutputBufferWriteString(buf, (const char *)cur->name); +- xmlOutputBufferWrite(buf, 1, ";"); +- return; +- } +- if (cur->type == XML_CDATA_SECTION_NODE) { +- if (cur->content == NULL || *cur->content == '\0') { +- xmlOutputBufferWrite(buf, 12, ""); +- } else { +- start = end = cur->content; +- while (*end != '\0') { +- if ((*end == ']') && (*(end + 1) == ']') && +- (*(end + 2) == '>')) { +- end = end + 2; +- xmlOutputBufferWrite(buf, 9, ""); +- start = end; +- } +- end++; +- } +- if (start != end) { +- xmlOutputBufferWrite(buf, 9, ""); +- } +- } +- return; +- } +- if (cur->type == XML_ATTRIBUTE_NODE) { +- xmlAttrDumpOutput(ctxt, (xmlAttrPtr) cur); +- return; +- } +- if (cur->type == XML_NAMESPACE_DECL) { +- xmlNsDumpOutputCtxt(ctxt, (xmlNsPtr) cur); +- return; +- } ++ case XML_PI_NODE: ++ if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput)) ++ xmlOutputBufferWrite(buf, ctxt->indent_size * ++ (ctxt->level > ctxt->indent_nr ? ++ ctxt->indent_nr : ctxt->level), ++ ctxt->indent); + +- format = ctxt->format; +- if (format == 1) { +- tmp = cur->children; +- while (tmp != NULL) { +- if ((tmp->type == XML_TEXT_NODE) || +- (tmp->type == XML_CDATA_SECTION_NODE) || +- (tmp->type == XML_ENTITY_REF_NODE)) { +- ctxt->format = 0; +- break; +- } +- tmp = tmp->next; +- } +- } +- xmlOutputBufferWrite(buf, 1, "<"); +- if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { +- xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); +- xmlOutputBufferWrite(buf, 1, ":"); +- } ++ if (cur->content != NULL) { ++ xmlOutputBufferWrite(buf, 2, "name); ++ if (cur->content != NULL) { ++ if (ctxt->format == 2) ++ xmlOutputBufferWriteWSNonSig(ctxt, 0); ++ else ++ xmlOutputBufferWrite(buf, 1, " "); ++ xmlOutputBufferWriteString(buf, ++ (const char *)cur->content); ++ } ++ xmlOutputBufferWrite(buf, 2, "?>"); ++ } else { ++ xmlOutputBufferWrite(buf, 2, "name); ++ if (ctxt->format == 2) ++ xmlOutputBufferWriteWSNonSig(ctxt, 0); ++ xmlOutputBufferWrite(buf, 2, "?>"); ++ } ++ break; + +- xmlOutputBufferWriteString(buf, (const char *)cur->name); +- if (cur->nsDef) +- xmlNsListDumpOutputCtxt(ctxt, cur->nsDef); +- if (cur->properties != NULL) +- xmlAttrListDumpOutput(ctxt, cur->properties); +- +- if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && +- (cur->children == NULL) && ((ctxt->options & XML_SAVE_NO_EMPTY) == 0)) { +- if (ctxt->format == 2) +- xmlOutputBufferWriteWSNonSig(ctxt, 0); +- xmlOutputBufferWrite(buf, 2, "/>"); +- ctxt->format = format; +- return; +- } +- if (ctxt->format == 2) +- xmlOutputBufferWriteWSNonSig(ctxt, 1); +- xmlOutputBufferWrite(buf, 1, ">"); +- if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) { +- xmlOutputBufferWriteEscape(buf, cur->content, ctxt->escape); +- } +- if (cur->children != NULL) { +- if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n"); +- if (ctxt->level >= 0) ctxt->level++; +- xmlNodeListDumpOutput(ctxt, cur->children); +- if (ctxt->level > 0) ctxt->level--; +- if ((xmlIndentTreeOutput) && (ctxt->format == 1)) +- xmlOutputBufferWrite(buf, ctxt->indent_size * +- (ctxt->level > ctxt->indent_nr ? +- ctxt->indent_nr : ctxt->level), +- ctxt->indent); +- } +- xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { +- xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); +- xmlOutputBufferWrite(buf, 1, ":"); +- } ++ case XML_COMMENT_NODE: ++ if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput)) ++ xmlOutputBufferWrite(buf, ctxt->indent_size * ++ (ctxt->level > ctxt->indent_nr ? ++ ctxt->indent_nr : ctxt->level), ++ ctxt->indent); + +- xmlOutputBufferWriteString(buf, (const char *)cur->name); +- if (ctxt->format == 2) +- xmlOutputBufferWriteWSNonSig(ctxt, 0); +- xmlOutputBufferWrite(buf, 1, ">"); +- ctxt->format = format; ++ if (cur->content != NULL) { ++ xmlOutputBufferWrite(buf, 4, ""); ++ } ++ break; ++ ++ case XML_ENTITY_REF_NODE: ++ xmlOutputBufferWrite(buf, 1, "&"); ++ xmlOutputBufferWriteString(buf, (const char *)cur->name); ++ xmlOutputBufferWrite(buf, 1, ";"); ++ break; ++ ++ case XML_CDATA_SECTION_NODE: ++ if (cur->content == NULL || *cur->content == '\0') { ++ xmlOutputBufferWrite(buf, 12, ""); ++ } else { ++ start = end = cur->content; ++ while (*end != '\0') { ++ if ((*end == ']') && (*(end + 1) == ']') && ++ (*(end + 2) == '>')) { ++ end = end + 2; ++ xmlOutputBufferWrite(buf, 9, ""); ++ start = end; ++ } ++ end++; ++ } ++ if (start != end) { ++ xmlOutputBufferWrite(buf, 9, ""); ++ } ++ } ++ break; ++ ++ case XML_ATTRIBUTE_NODE: ++ xmlAttrDumpOutput(ctxt, (xmlAttrPtr) cur); ++ break; ++ ++ case XML_NAMESPACE_DECL: ++ xmlNsDumpOutputCtxt(ctxt, (xmlNsPtr) cur); ++ break; ++ ++ default: ++ break; ++ } ++ ++ while (1) { ++ if (cur == root) ++ return; ++ if (ctxt->format == 1) { ++ xmlOutputBufferWrite(buf, 1, "\n"); ++ } ++ if (cur->next != NULL) { ++ cur = cur->next; ++ break; ++ } ++ ++ cur = cur->parent; ++ ++ if (ctxt->level > 0) ctxt->level--; ++ if ((xmlIndentTreeOutput) && (ctxt->format == 1)) ++ xmlOutputBufferWrite(buf, ctxt->indent_size * ++ (ctxt->level > ctxt->indent_nr ? ++ ctxt->indent_nr : ctxt->level), ++ ctxt->indent); ++ if (cur == unformattedNode) { ++ ctxt->format = format; ++ unformattedNode = NULL; ++ } ++ ++ xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { ++ xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); ++ xmlOutputBufferWrite(buf, 1, ":"); ++ } ++ ++ xmlOutputBufferWriteString(buf, (const char *)cur->name); ++ if (ctxt->format == 2) ++ xmlOutputBufferWriteWSNonSig(ctxt, 0); ++ xmlOutputBufferWrite(buf, 1, ">"); ++ } ++ } + } + + /** +@@ -1865,12 +1862,25 @@ xmlSaveDoc(xmlSaveCtxtPtr ctxt, xmlDocPtr doc) + * Returns the number of byte written or -1 in case of error + */ + long +-xmlSaveTree(xmlSaveCtxtPtr ctxt, xmlNodePtr node) ++xmlSaveTree(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) + { + long ret = 0; + +- if ((ctxt == NULL) || (node == NULL)) return(-1); +- xmlNodeDumpOutputInternal(ctxt, node); ++ if ((ctxt == NULL) || (cur == NULL)) return(-1); ++#ifdef LIBXML_HTML_ENABLED ++ if (ctxt->options & XML_SAVE_XHTML) { ++ xhtmlNodeDumpOutput(ctxt, cur); ++ return(ret); ++ } ++ if (((cur->type != XML_NAMESPACE_DECL) && (cur->doc != NULL) && ++ (cur->doc->type == XML_HTML_DOCUMENT_NODE) && ++ ((ctxt->options & XML_SAVE_AS_XML) == 0)) || ++ (ctxt->options & XML_SAVE_AS_HTML)) { ++ htmlNodeDumpOutputInternal(ctxt, cur); ++ return(ret); ++ } ++#endif ++ xmlNodeDumpOutputInternal(ctxt, cur); + return(ret); + } + +-- +1.8.3.1 + diff --git a/More-NodeDumpOutput-fixes.patch b/More-NodeDumpOutput-fixes.patch new file mode 100644 index 0000000000000000000000000000000000000000..26fefc4357e4179226d4a39ffb235154350d962a --- /dev/null +++ b/More-NodeDumpOutput-fixes.patch @@ -0,0 +1,55 @@ +From 1a360c1c2ec950f478d55b31722ecf78f5698e97 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Wed, 29 Jul 2020 00:39:15 +0200 +Subject: [PATCH] More *NodeDumpOutput fixes + +When leaving nodes, restrict more operations to XML_ELEMENT_NODEs. +--- + xmlsave.c | 44 ++++++++++++++++++++++---------------------- + 1 file changed, 22 insertions(+), 22 deletions(-) + +diff --git a/xmlsave.c b/xmlsave.c +index f2e0ea8..f1d40b9 100644 +--- a/xmlsave.c ++++ b/xmlsave.c +@@ -1058,14 +1058,14 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + + cur = cur->parent; + +- if (ctxt->level > 0) ctxt->level--; +- if ((xmlIndentTreeOutput) && (ctxt->format == 1)) +- xmlOutputBufferWrite(buf, ctxt->indent_size * +- (ctxt->level > ctxt->indent_nr ? +- ctxt->indent_nr : ctxt->level), +- ctxt->indent); +- + if (cur->type == XML_ELEMENT_NODE) { ++ if (ctxt->level > 0) ctxt->level--; ++ if ((xmlIndentTreeOutput) && (ctxt->format == 1)) ++ xmlOutputBufferWrite(buf, ctxt->indent_size * ++ (ctxt->level > ctxt->indent_nr ? ++ ctxt->indent_nr : ctxt->level), ++ ctxt->indent); ++ + xmlOutputBufferWrite(buf, 2, "ns != NULL) && (cur->ns->prefix != NULL)) { + xmlOutputBufferWriteString(buf, +@@ -1077,11 +1077,11 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + if (ctxt->format == 2) + xmlOutputBufferWriteWSNonSig(ctxt, 0); + xmlOutputBufferWrite(buf, 1, ">"); +- } + +- if (cur == unformattedNode) { +- ctxt->format = format; +- unformattedNode = NULL; ++ if (cur == unformattedNode) { ++ ctxt->format = format; ++ unformattedNode = NULL; ++ } + } + } + } +-- +1.8.3.1 + diff --git a/Remove-dead-code-in-xinclude.c.patch b/Remove-dead-code-in-xinclude.c.patch new file mode 100644 index 0000000000000000000000000000000000000000..be1f3b2914235f7476c057de50a0926cc27da873 --- /dev/null +++ b/Remove-dead-code-in-xinclude.c.patch @@ -0,0 +1,103 @@ +From 74dcc10b556cc4d1088a2496f7e93f8a8040447e Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Mon, 17 Aug 2020 03:24:56 +0200 +Subject: [PATCH] Remove dead code in xinclude.c + +'doc' is checked for NULL in xmlXIncludeLoadDoc, so several code +paths can be eliminated. +--- + xinclude.c | 43 ++++++------------------------------------- + 1 file changed, 6 insertions(+), 37 deletions(-) + +diff --git a/xinclude.c b/xinclude.c +index 2423a93..36bdfae 100644 +--- a/xinclude.c ++++ b/xinclude.c +@@ -59,7 +59,6 @@ struct _xmlXIncludeRef { + xmlNodePtr inc; /* the included copy */ + int xml; /* xml or txt */ + int count; /* how many refs use that specific doc */ +- xmlXPathObjectPtr xptr; /* the xpointer if needed */ + int skip; /* skip in case of errors */ + int fallback; /* fallback was loaded */ + }; +@@ -211,8 +210,6 @@ xmlXIncludeFreeRef(xmlXIncludeRefPtr ref) { + xmlFree(ref->URI); + if (ref->fragment != NULL) + xmlFree(ref->fragment); +- if (ref->xptr != NULL) +- xmlXPathFreeObject(ref->xptr); + xmlFree(ref); + } + +@@ -1557,15 +1554,8 @@ loaded: + /* + * Add the top children list as the replacement copy. + */ +- if (doc == NULL) +- { +- /* Hopefully a DTD declaration won't be copied from +- * the same document */ +- ctxt->incTab[nr]->inc = xmlCopyNodeList(ctxt->doc->children); +- } else { +- ctxt->incTab[nr]->inc = xmlXIncludeCopyNodeList(ctxt, ctxt->doc, +- doc, doc->children); +- } ++ ctxt->incTab[nr]->inc = xmlXIncludeCopyNodeList(ctxt, ctxt->doc, ++ doc, doc->children); + } + #ifdef LIBXML_XPTR_ENABLED + else { +@@ -1577,12 +1567,7 @@ loaded: + xmlXPathContextPtr xptrctxt; + xmlNodeSetPtr set; + +- if (doc == NULL) { +- xptrctxt = xmlXPtrNewContext(ctxt->doc, ctxt->incTab[nr]->ref, +- NULL); +- } else { +- xptrctxt = xmlXPtrNewContext(doc, NULL, NULL); +- } ++ xptrctxt = xmlXPtrNewContext(doc, NULL, NULL); + if (xptrctxt == NULL) { + xmlXIncludeErr(ctxt, ctxt->incTab[nr]->ref, + XML_XINCLUDE_XPTR_FAILED, +@@ -1686,14 +1671,9 @@ loaded: + } + } + } +- if (doc == NULL) { +- ctxt->incTab[nr]->xptr = xptr; +- ctxt->incTab[nr]->inc = NULL; +- } else { +- ctxt->incTab[nr]->inc = +- xmlXIncludeCopyXPointer(ctxt, ctxt->doc, doc, xptr); +- xmlXPathFreeObject(xptr); +- } ++ ctxt->incTab[nr]->inc = ++ xmlXIncludeCopyXPointer(ctxt, ctxt->doc, doc, xptr); ++ xmlXPathFreeObject(xptr); + xmlXPathFreeContext(xptrctxt); + xmlFree(fragment); + } +@@ -2212,17 +2192,6 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { + if ((cur == NULL) || (cur->type == XML_NAMESPACE_DECL)) + return(-1); + +- /* +- * If we stored an XPointer a late computation may be needed +- */ +- if ((ctxt->incTab[nr]->inc == NULL) && +- (ctxt->incTab[nr]->xptr != NULL)) { +- ctxt->incTab[nr]->inc = +- xmlXIncludeCopyXPointer(ctxt, ctxt->doc, ctxt->doc, +- ctxt->incTab[nr]->xptr); +- xmlXPathFreeObject(ctxt->incTab[nr]->xptr); +- ctxt->incTab[nr]->xptr = NULL; +- } + list = ctxt->incTab[nr]->inc; + ctxt->incTab[nr]->inc = NULL; + +-- +1.8.3.1 + diff --git a/Remove-unused-encoding-parameter-of-HTML-output-func.patch b/Remove-unused-encoding-parameter-of-HTML-output-func.patch new file mode 100644 index 0000000000000000000000000000000000000000..f5013ecaded6b3acc7af4296b0a87e64bfa5f7ec --- /dev/null +++ b/Remove-unused-encoding-parameter-of-HTML-output-func.patch @@ -0,0 +1,133 @@ +From e6495e47890afacfc3513a9161671e8d228ccc76 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Sun, 7 Feb 2021 13:38:01 +0100 +Subject: [PATCH] Remove unused encoding parameter of HTML output functions + +The encoding string is unused. Encodings are set by way of the output +buffer. +--- + HTMLtree.c | 34 +++++++++++++++++----------------- + 1 file changed, 17 insertions(+), 17 deletions(-) + +diff --git a/HTMLtree.c b/HTMLtree.c +index 8d0c779..24434d4 100644 +--- a/HTMLtree.c ++++ b/HTMLtree.c +@@ -518,7 +518,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, + buf = xmlOutputBufferCreateFile(out, handler); + if (buf == NULL) return(0); + +- htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); ++ htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format); + + ret = xmlOutputBufferClose(buf); + return(ret); +@@ -670,13 +670,11 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + * @buf: the HTML buffer output + * @doc: the document + * @cur: the attribute pointer +- * @encoding: the encoding string + * + * Dump an HTML attribute + */ + static void +-htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, +- const char *encoding ATTRIBUTE_UNUSED) { ++htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { + xmlChar *value; + + /* +@@ -737,14 +735,15 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, + * @buf: the HTML buffer output + * @doc: the document + * @cur: the current node +- * @encoding: the encoding string ++ * @encoding: the encoding string (unused) + * @format: should formatting spaces been added + * + * Dump an HTML node, recursive behaviour,children are printed too. + */ + void + htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, +- xmlNodePtr cur, const char *encoding, int format) { ++ xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED, ++ int format) { + xmlNodePtr root; + xmlAttrPtr attr; + const htmlElemDesc * info; +@@ -788,7 +787,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNsListDumpOutput(buf, cur->nsDef); + attr = cur->properties; + while (attr != NULL) { +- htmlAttrDumpOutput(buf, doc, attr, encoding); ++ htmlAttrDumpOutput(buf, doc, attr); + attr = attr->next; + } + +@@ -835,7 +834,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + break; + + case XML_ATTRIBUTE_NODE: +- htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding); ++ htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur); + break; + + case HTML_TEXT_NODE: +@@ -955,44 +954,45 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + * @buf: the HTML buffer output + * @doc: the document + * @cur: the current node +- * @encoding: the encoding string ++ * @encoding: the encoding string (unused) + * + * Dump an HTML node, recursive behaviour,children are printed too, + * and formatting returns/spaces are added. + */ + void + htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, +- xmlNodePtr cur, const char *encoding) { +- htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1); ++ xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) { ++ htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1); + } + + /** + * htmlDocContentDumpFormatOutput: + * @buf: the HTML buffer output + * @cur: the document +- * @encoding: the encoding string ++ * @encoding: the encoding string (unused) + * @format: should formatting spaces been added + * + * Dump an HTML document. + */ + void + htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, +- const char *encoding, int format) { +- htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, encoding, format); ++ const char *encoding ATTRIBUTE_UNUSED, ++ int format) { ++ htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format); + } + + /** + * htmlDocContentDumpOutput: + * @buf: the HTML buffer output + * @cur: the document +- * @encoding: the encoding string ++ * @encoding: the encoding string (unused) + * + * Dump an HTML document. Formatting return/spaces are added. + */ + void + htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, +- const char *encoding) { +- htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, encoding, 1); ++ const char *encoding ATTRIBUTE_UNUSED) { ++ htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1); + } + + /************************************************************************ +-- +1.8.3.1 + diff --git a/Revert-Fix-quadratic-runtime-in-xi-fallback-processi.patch b/Revert-Fix-quadratic-runtime-in-xi-fallback-processi.patch new file mode 100644 index 0000000000000000000000000000000000000000..75939920ccfcc8c80a1176d3bd998dd92e2727ce --- /dev/null +++ b/Revert-Fix-quadratic-runtime-in-xi-fallback-processi.patch @@ -0,0 +1,67 @@ +From 19cae17f5a2acfbd5554d145bb87cd6bf2de244f Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Wed, 19 Aug 2020 13:07:28 +0200 +Subject: [PATCH] Revert "Fix quadratic runtime in xi:fallback processing" + +This reverts commit 27119ec33c9f6b9830efa1e0da0acfa353dfa55a. + +Not copying fallback children didn't fix up namespaces and could lead +to use-after-free errors. + +Found by OSS-Fuzz. +--- + xinclude.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/xinclude.c b/xinclude.c +index 3c810ca..9024535 100644 +--- a/xinclude.c ++++ b/xinclude.c +@@ -1984,7 +1984,8 @@ xmlXIncludeLoadFallback(xmlXIncludeCtxtPtr ctxt, xmlNodePtr fallback, int nr) { + ret = -1; + xmlXIncludeFreeContext(newctxt); + +- ctxt->incTab[nr]->inc = fallback->children; ++ ctxt->incTab[nr]->inc = xmlDocCopyNodeList(ctxt->doc, ++ fallback->children); + } else { + ctxt->incTab[nr]->inc = NULL; + } +@@ -2240,6 +2241,12 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { + if (ctxt->incTab[nr]->fallback) + xmlUnsetProp(cur, BAD_CAST "href"); + cur->type = XML_XINCLUDE_START; ++ /* Remove fallback children */ ++ for (child = cur->children; child != NULL; child = next) { ++ next = child->next; ++ xmlUnlinkNode(child); ++ xmlFreeNode(child); ++ } + end = xmlNewDocNode(cur->doc, cur->ns, cur->name, NULL); + if (end == NULL) { + xmlXIncludeErr(ctxt, ctxt->incTab[nr]->ref, +@@ -2255,17 +2262,11 @@ xmlXIncludeIncludeNode(xmlXIncludeCtxtPtr ctxt, int nr) { + * Add the list of nodes + */ + while (list != NULL) { +- next = list->next; +- xmlAddPrevSibling(end, list); +- list = next; +- } ++ cur = list; ++ list = list->next; + +- /* Remove fallback node */ +- for (child = cur->children; child != NULL; child = next) { +- next = child->next; +- xmlUnlinkNode(child); +- xmlFreeNode(child); +- } ++ xmlAddPrevSibling(end, cur); ++ } + } + + +-- +1.8.3.1 + diff --git a/Work-around-lxml-API-abuse.patch b/Work-around-lxml-API-abuse.patch new file mode 100644 index 0000000000000000000000000000000000000000..8bb91b443405021e2def8536f8b85b3441a4fe43 --- /dev/null +++ b/Work-around-lxml-API-abuse.patch @@ -0,0 +1,212 @@ +From 85b1792e37b131e7a51af98a37f92472e8de5f3f Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Tue, 18 May 2021 20:08:28 +0200 +Subject: [PATCH] Work around lxml API abuse + +Make xmlNodeDumpOutput and htmlNodeDumpFormatOutput work with corrupted +parent pointers. This used to work with the old recursive code but the +non-recursive rewrite required parent pointers to be set correctly. + +Unfortunately, lxml relies on the old behavior and passes subtrees with +a corrupted structure. Fall back to a recursive function call if an +invalid parent pointer is detected. + +Fixes #255. +--- + HTMLtree.c | 46 ++++++++++++++++++++++++++++------------------ + xmlsave.c | 31 +++++++++++++++++++++---------- + 2 files changed, 49 insertions(+), 28 deletions(-) + +diff --git a/HTMLtree.c b/HTMLtree.c +index 24434d4..bdd639c 100644 +--- a/HTMLtree.c ++++ b/HTMLtree.c +@@ -744,7 +744,7 @@ void + htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED, + int format) { +- xmlNodePtr root; ++ xmlNodePtr root, parent; + xmlAttrPtr attr; + const htmlElemDesc * info; + +@@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + } + + root = cur; ++ parent = cur->parent; + while (1) { + switch (cur->type) { + case XML_HTML_DOCUMENT_NODE: +@@ -762,7 +763,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + if (((xmlDocPtr) cur)->intSubset != NULL) { + htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL); + } +- if (cur->children != NULL) { ++ /* Always validate cur->parent when descending. */ ++ if ((cur->parent == parent) && (cur->children != NULL)) { ++ parent = cur; + cur = cur->children; + continue; + } +@@ -770,6 +773,16 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + + case XML_ELEMENT_NODE: + /* ++ * Some users like lxml are known to pass nodes with a corrupted ++ * tree structure. Fall back to a recursive call to handle this ++ * case. ++ */ ++ if ((cur->parent != parent) && (cur->children != NULL)) { ++ htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format); ++ break; ++ } ++ ++ /* + * Get specific HTML info for that node. + */ + if (cur->ns == NULL) +@@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + (cur->name != NULL) && + (cur->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); ++ parent = cur; + cur = cur->children; + continue; + } +@@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + (info != NULL) && (!info->isinline)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && +- (cur->parent != NULL) && +- (cur->parent->name != NULL) && +- (cur->parent->name[0] != 'p')) /* p, pre, param */ ++ (parent != NULL) && ++ (parent->name != NULL) && ++ (parent->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + } + +@@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + break; + if (((cur->name == (const xmlChar *)xmlStringText) || + (cur->name != (const xmlChar *)xmlStringTextNoenc)) && +- ((cur->parent == NULL) || +- ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && +- (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { ++ ((parent == NULL) || ++ ((xmlStrcasecmp(parent->name, BAD_CAST "script")) && ++ (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) { + xmlChar *buffer; + + buffer = xmlEncodeEntitiesReentrant(doc, cur->content); +@@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + break; + } + +- /* +- * The parent should never be NULL here but we want to handle +- * corrupted documents gracefully. +- */ +- if (cur->parent == NULL) +- return; +- cur = cur->parent; ++ cur = parent; ++ /* cur->parent was validated when descending. */ ++ parent = cur->parent; + + if ((cur->type == XML_HTML_DOCUMENT_NODE) || + (cur->type == XML_DOCUMENT_NODE)) { +@@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, + (cur->next != NULL)) { + if ((cur->next->type != HTML_TEXT_NODE) && + (cur->next->type != HTML_ENTITY_REF_NODE) && +- (cur->parent != NULL) && +- (cur->parent->name != NULL) && +- (cur->parent->name[0] != 'p')) /* p, pre, param */ ++ (parent != NULL) && ++ (parent->name != NULL) && ++ (parent->name[0] != 'p')) /* p, pre, param */ + xmlOutputBufferWriteString(buf, "\n"); + } + } +diff --git a/xmlsave.c b/xmlsave.c +index 61a4045..aedbd5e 100644 +--- a/xmlsave.c ++++ b/xmlsave.c +@@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + static void + xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + int format = ctxt->format; +- xmlNodePtr tmp, root, unformattedNode = NULL; ++ xmlNodePtr tmp, root, unformattedNode = NULL, parent; + xmlAttrPtr attr; + xmlChar *start, *end; + xmlOutputBufferPtr buf; +@@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + buf = ctxt->buf; + + root = cur; ++ parent = cur->parent; + while (1) { + switch (cur->type) { + case XML_DOCUMENT_NODE: +@@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + + case XML_DOCUMENT_FRAG_NODE: +- if (cur->children != NULL) { ++ /* Always validate cur->parent when descending. */ ++ if ((cur->parent == parent) && (cur->children != NULL)) { ++ parent = cur; + cur = cur->children; + continue; + } +@@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + + case XML_ELEMENT_NODE: +- if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput)) ++ /* ++ * Some users like lxml are known to pass nodes with a corrupted ++ * tree structure. Fall back to a recursive call to handle this ++ * case. ++ */ ++ if ((cur->parent != parent) && (cur->children != NULL)) { ++ xmlNodeDumpOutputInternal(ctxt, cur); ++ break; ++ } ++ ++ if ((ctxt->level > 0) && (ctxt->format == 1) && ++ (xmlIndentTreeOutput)) + xmlOutputBufferWrite(buf, ctxt->indent_size * + (ctxt->level > ctxt->indent_nr ? + ctxt->indent_nr : ctxt->level), +@@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + xmlOutputBufferWrite(buf, 1, ">"); + if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n"); + if (ctxt->level >= 0) ctxt->level++; ++ parent = cur; + cur = cur->children; + continue; + } +@@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { + break; + } + +- /* +- * The parent should never be NULL here but we want to handle +- * corrupted documents gracefully. +- */ +- if (cur->parent == NULL) +- return; +- cur = cur->parent; ++ cur = parent; ++ /* cur->parent was validated when descending. */ ++ parent = cur->parent; + + if (cur->type == XML_ELEMENT_NODE) { + if (ctxt->level > 0) ctxt->level--; +-- +1.8.3.1 + diff --git a/libxml2.spec b/libxml2.spec index 051b7a481e5d96d0e24cc9db6b072e71989aaa1e..295181553b4d48303b5a1964adc77cabfee0e57b 100644 --- a/libxml2.spec +++ b/libxml2.spec @@ -1,7 +1,7 @@ Summary: Library providing XML and HTML support Name: libxml2 Version: 2.9.10 -Release: 23 +Release: 24 License: MIT Group: Development/Libraries Source: ftp://xmlsoft.org/libxml2/libxml2-%{version}.tar.gz @@ -84,7 +84,16 @@ Patch72: Fix-use-after-free-with-xmllint-html-push.patch Patch73: Fix-xmlGetNodePath-with-invalid-node-types.patch Patch74: Stop-checking-attributes-for-UTF-8-validity.patch Patch75: CVE-2021-3541.patch - +Patch76: Fix-corner-case-with-empty-xi-fallback.patch +Patch77: Fix-quadratic-runtime-in-xi-fallback-processing.patch +Patch78: Fix-error-reporting-with-xi-fallback.patch +Patch79: Revert-Fix-quadratic-runtime-in-xi-fallback-processi.patch +Patch80: Remove-dead-code-in-xinclude.c.patch +Patch81: Fix-regression-introduced-with-commit-74dcc10b.patch +Patch82: Fix-regression-introduced-with-commit-d88df4b.patch +Patch83: Make-xmlNodeDumpOutputInternal-non-recursive.patch +Patch84: Fix-NodeDumpOutput-functions.patch +Patch85: Make-htmlNodeDumpFormatOutput-non-recursive.patch Patch86: Fix-memory-leaks-in-XPointer-string-range-function.patch Patch87: Fix-null-pointer-deref-in-xmlXPtrRangeInsideFunction.patch Patch88: Stop-using-maxParserDepth-in-xpath.c.patch @@ -101,6 +110,13 @@ Patch98: Fix-null-deref-in-xmlStringGetNodeList.patch Patch99: Fix-memory-leak-in-xmlParseElementMixedContentDecl.patch Patch100:Fix-slow-parsing-of-HTML-with-encoding-errors.patch +Patch101:More-NodeDumpOutput-fixes.patch +Patch102:Don-t-add-formatting-newlines-to-XInclude-nodes.patch +Patch103:Handle-dumps-of-corrupted-documents-more-gracefully.patch +Patch104:Remove-unused-encoding-parameter-of-HTML-output-func.patch +Patch105:Work-around-lxml-API-abuse.patch +Patch106:Fix-regression-in-xmlNodeDumpOutputInternal.patch + BuildRoot: %{_tmppath}/%{name}-%{version}-root BuildRequires: python2-devel BuildRequires: python3-devel @@ -292,6 +308,12 @@ rm -fr %{buildroot} %changelog +* Thu Dec 2 2021 panxiaohe - 2.9.10-24 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:backport upstream patches + * Sat Nov 27 2021 Wentao Fan - 2.9.10-23 - Type:bugfix - ID:NA