From a3d08d8657e3889791c925ad010994b6542ab200 Mon Sep 17 00:00:00 2001 From: panxiaohe Date: Thu, 11 Nov 2021 20:19:02 +0800 Subject: [PATCH] fix issues about SEGV, memleak, overflow, etc. --- Fix-SEGV-in-xmlSAXParseFileWithData.patch | 26 +++++ ...k-in-xmlParseElementMixedContentDecl.patch | 42 ++++++++ Fix-null-deref-in-xmlStringGetNodeList.patch | 30 ++++++ ...parsing-of-HTML-with-encoding-errors.patch | 100 ++++++++++++++++++ Fix-undefined-behavior-in-UTF16LEToUTF8.patch | 39 +++++++ ...eger-overflow-in-htmlParseTryOrFinis.patch | 79 ++++++++++++++ ...leak-in-xmlRegisterCharEncodingHandl.patch | 55 ++++++++++ libxml2.spec | 22 +++- 8 files changed, 392 insertions(+), 1 deletion(-) create mode 100644 Fix-SEGV-in-xmlSAXParseFileWithData.patch create mode 100644 Fix-memory-leak-in-xmlParseElementMixedContentDecl.patch create mode 100644 Fix-null-deref-in-xmlStringGetNodeList.patch create mode 100644 Fix-slow-parsing-of-HTML-with-encoding-errors.patch create mode 100644 Fix-undefined-behavior-in-UTF16LEToUTF8.patch create mode 100644 Fix-unsigned-integer-overflow-in-htmlParseTryOrFinis.patch create mode 100644 encoding-fix-memleak-in-xmlRegisterCharEncodingHandl.patch diff --git a/Fix-SEGV-in-xmlSAXParseFileWithData.patch b/Fix-SEGV-in-xmlSAXParseFileWithData.patch new file mode 100644 index 0000000..b65894b --- /dev/null +++ b/Fix-SEGV-in-xmlSAXParseFileWithData.patch @@ -0,0 +1,26 @@ +From 7929f05710134b9b243952019b6c14066cd3ac9e Mon Sep 17 00:00:00 2001 +From: yanjinjq +Date: Sun, 30 Aug 2020 10:34:01 +0000 +Subject: [PATCH] Fix SEGV in xmlSAXParseFileWithData + +Fixes #181. +--- + parser.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/parser.c b/parser.c +index be14c32..f779eb6 100644 +--- a/parser.c ++++ b/parser.c +@@ -14077,7 +14077,7 @@ xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename, + + if ((ctxt->wellFormed) || recovery) { + ret = ctxt->myDoc; +- if (ret != NULL) { ++ if ((ret != NULL) && (ctxt->input->buf != NULL)) { + if (ctxt->input->buf->compressed > 0) + ret->compression = 9; + else +-- +1.8.3.1 + diff --git a/Fix-memory-leak-in-xmlParseElementMixedContentDecl.patch b/Fix-memory-leak-in-xmlParseElementMixedContentDecl.patch new file mode 100644 index 0000000..29b59a2 --- /dev/null +++ b/Fix-memory-leak-in-xmlParseElementMixedContentDecl.patch @@ -0,0 +1,42 @@ +From 45da175c1431d69e74e05a115f0b14cc8c97d886 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Fri, 18 Dec 2020 12:14:52 +0100 +Subject: [PATCH] Fix memory leak in xmlParseElementMixedContentDecl + +Free parsed content if malloc fails to avoid a memory leak. + +Found with libFuzzer. +--- + parser.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/parser.c b/parser.c +index 85494df..43b8835 100644 +--- a/parser.c ++++ b/parser.c +@@ -6082,14 +6082,20 @@ xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) { + NEXT; + if (elem == NULL) { + ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); +- if (ret == NULL) return(NULL); ++ if (ret == NULL) { ++ xmlFreeDocElementContent(ctxt->myDoc, cur); ++ return(NULL); ++ } + ret->c1 = cur; + if (cur != NULL) + cur->parent = ret; + cur = ret; + } else { + n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR); +- if (n == NULL) return(NULL); ++ if (n == NULL) { ++ xmlFreeDocElementContent(ctxt->myDoc, ret); ++ return(NULL); ++ } + n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT); + if (n->c1 != NULL) + n->c1->parent = n; +-- +1.8.3.1 + diff --git a/Fix-null-deref-in-xmlStringGetNodeList.patch b/Fix-null-deref-in-xmlStringGetNodeList.patch new file mode 100644 index 0000000..6e14174 --- /dev/null +++ b/Fix-null-deref-in-xmlStringGetNodeList.patch @@ -0,0 +1,30 @@ +From 1d73f07d67e32d8eaccd85bc46c5d277a1dc00c9 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Fri, 18 Dec 2020 00:55:00 +0100 +Subject: [PATCH] Fix null deref in xmlStringGetNodeList + +Check for malloc failure to avoid null deref. + +Found with libFuzzer. +--- + tree.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tree.c b/tree.c +index 64572d9..2130d55 100644 +--- a/tree.c ++++ b/tree.c +@@ -1649,6 +1649,10 @@ xmlStringGetNodeList(const xmlDoc *doc, const xmlChar *value) { + + if (!xmlBufIsEmpty(buf)) { + node = xmlNewDocText(doc, NULL); ++ if (node == NULL) { ++ xmlBufFree(buf); ++ return(NULL); ++ } + node->content = xmlBufDetach(buf); + + if (last == NULL) { +-- +1.8.3.1 + diff --git a/Fix-slow-parsing-of-HTML-with-encoding-errors.patch b/Fix-slow-parsing-of-HTML-with-encoding-errors.patch new file mode 100644 index 0000000..72be38b --- /dev/null +++ b/Fix-slow-parsing-of-HTML-with-encoding-errors.patch @@ -0,0 +1,100 @@ +From dcb80b92da0417bc5b3d97ab8a61381973f1711b Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Sat, 20 Feb 2021 20:30:43 +0100 +Subject: [PATCH] Fix slow parsing of HTML with encoding errors + +Under certain circumstances, the HTML parser would try to guess and +switch input encodings multiple times, leading to slow processing of +documents with encoding errors. The repeated scanning of the input +buffer when guessing encodings could even lead to quadratic behavior. + +The code htmlCurrentChar probably assumed that if there's an encoding +handler, it is guaranteed to produce valid UTF-8. This holds true in +general, but if the detected encoding was "UTF-8", the UTF8ToUTF8 +encoding handler simply invoked memcpy without checking for invalid +UTF-8. This still must be fixed, preferably by not using this handler +at all. + +Also leave a note that switching encodings twice seems impossible to +implement correctly. Add a check when handling UTF-8 encoding errors +in htmlCurrentChar to avoid this situation, even if encoders produce +invalid UTF-8. + +Found by OSS-Fuzz. +--- + HTMLparser.c | 18 ++++++++++++++++-- + encoding.c | 5 +++++ + parserInternals.c | 5 +++++ + 3 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/HTMLparser.c b/HTMLparser.c +index 14cc56f..c9a64c7 100644 +--- a/HTMLparser.c ++++ b/HTMLparser.c +@@ -457,7 +457,12 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) { + ctxt->input->encoding = guess; + handler = xmlFindCharEncodingHandler((const char *) guess); + if (handler != NULL) { +- xmlSwitchToEncoding(ctxt, handler); ++ /* ++ * Don't use UTF-8 encoder which isn't required and ++ * can produce invalid UTF-8. ++ */ ++ if (!xmlStrEqual(BAD_CAST handler->name, BAD_CAST "UTF-8")) ++ xmlSwitchToEncoding(ctxt, handler); + } else { + htmlParseErr(ctxt, XML_ERR_INVALID_ENCODING, + "Unsupported encoding %s", guess, NULL); +@@ -570,7 +575,16 @@ encoding_error: + BAD_CAST buffer, NULL); + } + +- ctxt->charset = XML_CHAR_ENCODING_8859_1; ++ /* ++ * Don't switch encodings twice. Note that if there's an encoder, we ++ * shouldn't receive invalid UTF-8 anyway. ++ * ++ * Note that if ctxt->input->buf == NULL, switching encodings is ++ * impossible, see Gitlab issue #34. ++ */ ++ if ((ctxt->input->buf != NULL) && ++ (ctxt->input->buf->encoder == NULL)) ++ xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1); + *len = 1; + return((int) *ctxt->input->cur); + } +diff --git a/encoding.c b/encoding.c +index d67c16d..cdff6ae 100644 +--- a/encoding.c ++++ b/encoding.c +@@ -373,6 +373,11 @@ UTF8ToUTF8(unsigned char* out, int *outlen, + if (len < 0) + return(-1); + ++ /* ++ * FIXME: Conversion functions must assure valid UTF-8, so we have ++ * to check for UTF-8 validity. Preferably, this converter shouldn't ++ * be used at all. ++ */ + memcpy(out, inb, len); + + *outlen = len; +diff --git a/parserInternals.c b/parserInternals.c +index b0629ef..cbcfde0 100644 +--- a/parserInternals.c ++++ b/parserInternals.c +@@ -1153,6 +1153,11 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input, + * Note: this is a bit dangerous, but that's what it + * takes to use nearly compatible signature for different + * encodings. ++ * ++ * FIXME: Encoders might buffer partial byte sequences, so ++ * this probably can't work. We should return an error and ++ * make sure that callers never try to switch the encoding ++ * twice. + */ + xmlCharEncCloseFunc(input->buf->encoder); + input->buf->encoder = handler; +-- +1.8.3.1 + diff --git a/Fix-undefined-behavior-in-UTF16LEToUTF8.patch b/Fix-undefined-behavior-in-UTF16LEToUTF8.patch new file mode 100644 index 0000000..a5d0fa8 --- /dev/null +++ b/Fix-undefined-behavior-in-UTF16LEToUTF8.patch @@ -0,0 +1,39 @@ +From 2f9382033e4c398dd1c9aae4d24fa9f649fbf23d Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Mon, 15 Jun 2020 15:45:47 +0200 +Subject: [PATCH] Fix undefined behavior in UTF16LEToUTF8 + +Don't perform arithmetic on null pointer. + +Found with libFuzzer and UBSan. +--- + encoding.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/encoding.c b/encoding.c +index 8b6f349..1a6386a 100644 +--- a/encoding.c ++++ b/encoding.c +@@ -496,13 +496,18 @@ UTF16LEToUTF8(unsigned char* out, int *outlen, + { + unsigned char* outstart = out; + const unsigned char* processed = inb; +- unsigned char* outend = out + *outlen; ++ unsigned char* outend; + unsigned short* in = (unsigned short*) inb; + unsigned short* inend; + unsigned int c, d, inlen; + unsigned char *tmp; + int bits; + ++ if (*outlen == 0) { ++ *inlenb = 0; ++ return(0); ++ } ++ outend = out + *outlen; + if ((*inlenb % 2) == 1) + (*inlenb)--; + inlen = *inlenb / 2; +-- +1.8.3.1 + diff --git a/Fix-unsigned-integer-overflow-in-htmlParseTryOrFinis.patch b/Fix-unsigned-integer-overflow-in-htmlParseTryOrFinis.patch new file mode 100644 index 0000000..029f70f --- /dev/null +++ b/Fix-unsigned-integer-overflow-in-htmlParseTryOrFinis.patch @@ -0,0 +1,79 @@ +From 681f094e5bd1d0f6b38b27701d0d1bf1ca7a9a26 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Mon, 15 Jun 2020 15:23:05 +0200 +Subject: [PATCH] Fix unsigned integer overflow in htmlParseTryOrFinish + +Cast to signed type before subtraction to avoid unsigned integer +overflow. Also use ptrdiff_t to avoid potential integer truncation. + +Found with libFuzzer and UBSan. +--- + HTMLparser.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/HTMLparser.c b/HTMLparser.c +index be7e14f..9ade663 100644 +--- a/HTMLparser.c ++++ b/HTMLparser.c +@@ -5339,7 +5339,7 @@ static int + htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { + int ret = 0; + htmlParserInputPtr in; +- int avail = 0; ++ ptrdiff_t avail = 0; + xmlChar cur, next; + + htmlParserNodeInfo node_info; +@@ -5404,7 +5404,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else +- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); ++ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) - ++ (in->cur - in->base); + if ((avail == 0) && (terminate)) { + htmlAutoCloseOnEnd(ctxt); + if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { +@@ -5440,7 +5441,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else +- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); ++ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) - ++ (in->cur - in->base); + } + if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) + ctxt->sax->setDocumentLocator(ctxt->userData, +@@ -5482,7 +5484,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else +- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); ++ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) - ++ (in->cur - in->base); + /* + * no chars in buffer + */ +@@ -5555,7 +5558,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else +- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); ++ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) - ++ (in->cur - in->base); + if (avail < 2) + goto done; + cur = in->cur[0]; +@@ -5596,7 +5600,8 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { + if (in->buf == NULL) + avail = in->length - (in->cur - in->base); + else +- avail = xmlBufUse(in->buf->buffer) - (in->cur - in->base); ++ avail = (ptrdiff_t)xmlBufUse(in->buf->buffer) - ++ (in->cur - in->base); + if (avail < 1) + goto done; + cur = in->cur[0]; +-- +1.8.3.1 + diff --git a/encoding-fix-memleak-in-xmlRegisterCharEncodingHandl.patch b/encoding-fix-memleak-in-xmlRegisterCharEncodingHandl.patch new file mode 100644 index 0000000..317de2c --- /dev/null +++ b/encoding-fix-memleak-in-xmlRegisterCharEncodingHandl.patch @@ -0,0 +1,55 @@ +From 649d02eaa419fa72ae6b131718a4ac77063d7a5a Mon Sep 17 00:00:00 2001 +From: Xiaoming Ni +Date: Mon, 7 Dec 2020 20:19:53 +0800 +Subject: [PATCH] encoding: fix memleak in xmlRegisterCharEncodingHandler() + +The return type of xmlRegisterCharEncodingHandler() is void. The invoker +cannot determine whether xmlRegisterCharEncodingHandler() is executed +successfully. when nbCharEncodingHandler >= MAX_ENCODING_HANDLERS, the +"handler" is not added to the array "handlers". As a result, the memory +of "handler" cannot be managed and released: memory leakage. + +so add "xmlfree(handler)" to fix memory leakage on the failure branch of +xmlRegisterCharEncodingHandler(). + +Reported-by: wuqing +Signed-off-by: Xiaoming Ni +--- + encoding.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/encoding.c b/encoding.c +index 264f60b..d67c16d 100644 +--- a/encoding.c ++++ b/encoding.c +@@ -1488,16 +1488,25 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { + if ((handler == NULL) || (handlers == NULL)) { + xmlEncodingErr(XML_I18N_NO_HANDLER, + "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); +- return; ++ goto free_handler; + } + + if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { + xmlEncodingErr(XML_I18N_EXCESS_HANDLER, + "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", + "MAX_ENCODING_HANDLERS"); +- return; ++ goto free_handler; + } + handlers[nbCharEncodingHandler++] = handler; ++ return; ++ ++free_handler: ++ if (handler != NULL) { ++ if (handler->name != NULL) { ++ xmlFree(handler->name); ++ } ++ xmlFree(handler); ++ } + } + + /** +-- +1.8.3.1 + diff --git a/libxml2.spec b/libxml2.spec index 6661d06..47b1fed 100644 --- a/libxml2.spec +++ b/libxml2.spec @@ -1,7 +1,7 @@ Summary: Library providing XML and HTML support Name: libxml2 Version: 2.9.10 -Release: 21 +Release: 22 License: MIT Group: Development/Libraries Source: ftp://xmlsoft.org/libxml2/libxml2-%{version}.tar.gz @@ -103,6 +103,13 @@ Patch90: Fix-XPath-recursion-limit.patch Patch91: Fix-Null-deref-in-xmlSchemaGetComponentTargetNs.patch Patch92: Fix-memleaks-in-xmlXIncludeProcessFlags.patch Patch93: Fix-heap-use-after-free-in-xmlAddNextSibling-and-xmlAddChild.patch +Patch94: Fix-unsigned-integer-overflow-in-htmlParseTryOrFinis.patch +Patch95: Fix-undefined-behavior-in-UTF16LEToUTF8.patch +Patch96: Fix-SEGV-in-xmlSAXParseFileWithData.patch +Patch97: encoding-fix-memleak-in-xmlRegisterCharEncodingHandl.patch +Patch98: Fix-null-deref-in-xmlStringGetNodeList.patch +Patch99: Fix-memory-leak-in-xmlParseElementMixedContentDecl.patch +Patch100:Fix-slow-parsing-of-HTML-with-encoding-errors.patch BuildRoot: %{_tmppath}/%{name}-%{version}-root BuildRequires: python2-devel @@ -295,6 +302,19 @@ rm -fr %{buildroot} %changelog +* Thu Nov 11 2021 panxiaohe - 2.9.10-22 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:fix following issues: + fix unsigned integer overflow in htmlParseTryOrFinish + fix undefined behavior in UTF16LEToUTF8 + fix SEGV in xmlSAXParseFileWithData + encoding: fix memleak in xmlRegisterCharEncodingHandler() + fix null deref in xmlStringGetNodeList + fix memory leak in xmlParseElementMixedContentDecl + fix slow parsing of HTML with encoding errors + * Thu Nov 11 2021 panxiaohe - 2.9.10-21 - Type:bugfix - ID:NA -- Gitee