diff --git a/Fix-exponential-runtime-in-xmlFARecurseDeterminism.patch b/Fix-exponential-runtime-in-xmlFARecurseDeterminism.patch new file mode 100644 index 0000000000000000000000000000000000000000..eff70aca02390251e504bbbdd97930c12c71ff87 --- /dev/null +++ b/Fix-exponential-runtime-in-xmlFARecurseDeterminism.patch @@ -0,0 +1,81 @@ +From 68eadabd0055cba39c4ea1acfa8931d0d10a44e5 Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Sat, 11 Jul 2020 21:32:10 +0200 +Subject: [PATCH] Fix exponential runtime in xmlFARecurseDeterminism + +In order to prevent visiting a state twice, states must be marked as +visited for the whole duration of graph traversal because states might +be reached by different paths. Otherwise state graphs like the +following can lead to exponential runtime: + + ->O-->O-->O-->O-->O-> + \ / \ / \ / \ / + O O O O + +Reset the "visited" flag only after the graph was traversed. + +xmlFAComputesDeterminism still has massive performance problems when +handling fuzzed input. By design, it has quadratic time complexity in +the number of reachable states. Some issues might also stem from +redundant epsilon transitions. With this fix, fuzzing regexes with a +maximum length of 100 becomes feasible at least. + +Found with libFuzzer. +--- + xmlregexp.c | 26 +++++++++++++++++++++++++- + 1 file changed, 25 insertions(+), 1 deletion(-) + +diff --git a/xmlregexp.c b/xmlregexp.c +index dbf3bf2c..f971f0c8 100644 +--- a/xmlregexp.c ++++ b/xmlregexp.c +@@ -2658,7 +2658,6 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, + state->markd = XML_REGEXP_MARK_VISITED; + res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], + to, atom); +- state->markd = 0; + if (res == 0) { + ret = 0; + /* t1->nd = 1; */ +@@ -2676,6 +2675,30 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, + return(ret); + } + ++/** ++ * xmlFAFinishRecurseDeterminism: ++ * @ctxt: a regexp parser context ++ * ++ * Reset flags after checking determinism. ++ */ ++static void ++xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) { ++ int transnr, nbTrans; ++ ++ if (state == NULL) ++ return; ++ if (state->markd != XML_REGEXP_MARK_VISITED) ++ return; ++ state->markd = 0; ++ ++ nbTrans = state->nbTrans; ++ for (transnr = 0; transnr < nbTrans; transnr++) { ++ xmlRegTransPtr t1 = &state->trans[transnr]; ++ if ((t1->atom == NULL) && (t1->to >= 0)) ++ xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]); ++ } ++} ++ + /** + * xmlFAComputesDeterminism: + * @ctxt: a regexp parser context +@@ -2789,6 +2812,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { + */ + ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], + t2->to, t2->atom); ++ xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]); + /* don't shortcut the computation so all non deterministic + transition get marked down + if (ret == 0) +-- +2.23.0 + diff --git a/Limit-regexp-nesting-depth.patch b/Limit-regexp-nesting-depth.patch new file mode 100644 index 0000000000000000000000000000000000000000..78f0cadf2e8ef456dfd532d1ddbd3ceebc2ae052 --- /dev/null +++ b/Limit-regexp-nesting-depth.patch @@ -0,0 +1,50 @@ +From fc842f6eba81f3b630e1ff1ffea69c6f4dd66ccc Mon Sep 17 00:00:00 2001 +From: Nick Wellnhofer +Date: Mon, 6 Jul 2020 15:22:12 +0200 +Subject: [PATCH] Limit regexp nesting depth + +Enforce a maximum nesting depth of 50 for regular expressions. Avoids +stack overflows with deeply nested regexes. + +Found by OSS-Fuzz. +--- + xmlregexp.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/xmlregexp.c b/xmlregexp.c +index 687290e2..dbf3bf2c 100644 +--- a/xmlregexp.c ++++ b/xmlregexp.c +@@ -273,6 +273,8 @@ struct _xmlAutomata { + int determinist; + int negs; + int flags; ++ ++ int depth; + }; + + struct _xmlRegexp { +@@ -5330,6 +5332,10 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) { + xmlRegStatePtr start, oldend, start0; + + NEXT; ++ if (ctxt->depth >= 50) { ++ ERROR("xmlFAParseAtom: maximum nesting depth exceeded"); ++ return(-1); ++ } + /* + * this extra Epsilon transition is needed if we count with 0 allowed + * unfortunately this can't be known at that point +@@ -5341,7 +5347,9 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) { + oldend = ctxt->end; + ctxt->end = NULL; + ctxt->atom = NULL; ++ ctxt->depth++; + xmlFAParseRegExp(ctxt, 0); ++ ctxt->depth--; + if (CUR == ')') { + NEXT; + } else { +-- +2.23.0 + diff --git a/libxml2.spec b/libxml2.spec index 8fcffe918611a0be2e8b0c0d48a6f8f00e001aec..4ddc8bfe71a35e00b15d87e1df26abb14c7f47aa 100644 --- a/libxml2.spec +++ b/libxml2.spec @@ -1,7 +1,7 @@ Summary: Library providing XML and HTML support Name: libxml2 Version: 2.9.10 -Release: 4 +Release: 5 License: MIT Group: Development/Libraries Source: ftp://xmlsoft.org/libxml2/libxml2-%{version}.tar.gz @@ -29,6 +29,8 @@ Patch19: Never-expand-parameter-entities-in-text-declaration.patch Patch20: Fix-integer-overflow-in-xmlFAParseQuantExact.patch Patch21: Report-error-for-invalid-regexp-quantifiers.patch Patch22: Add-regexp-regression-tests.patch +Patch23: Limit-regexp-nesting-depth.patch +Patch24: Fix-exponential-runtime-in-xmlFARecurseDeterminism.patch BuildRoot: %{_tmppath}/%{name}-%{version}-root BuildRequires: python2-devel @@ -220,6 +222,10 @@ rm -fr %{buildroot} %changelog +* Wed Aug 12 2020 Liquor - 2.9.10-5 +- Limit regexp nesting depth +- Fix exponential runtime in xmlFARecurseDeterminism + * Mon Aug 3 2020 Liquor - 2.9.10-4 - Fix integer overflow in xmlFAParseQuantExact