diff --git a/dirs-fix-trivial-over-read-of-input-data.patch b/dirs-fix-trivial-over-read-of-input-data.patch new file mode 100644 index 0000000000000000000000000000000000000000..00b0c9e5476d5987a70e93daf9574ef25c4cb12c --- /dev/null +++ b/dirs-fix-trivial-over-read-of-input-data.patch @@ -0,0 +1,78 @@ +# HG changeset patch +# User Augie Fackler +# Date 1570565895 14400 +# Node ID 2a0774e9d2a8ea3b452c416307ed1fc006010bce +# Parent 843da18386d580779a30b7d103615181a309262c +dirs: fix trivial over-read of input data + +This code, introduced in 8c0a7eeda06d, was intentionally over-reading +an input string to avoid getting a shared string object for a one-byte +input. Unfortunately with an empty input (like in the case of a fuzzer +getting started) this was a trivial over-read and triggered an +AddressSanitizer failure. + +I went out of my way to make sure the code still does the +copy-avoidance tricks. I don't think this change will cost us much +performance since the one-character strings should be cached +aggressively anyway. + +Differential Revision: https://phab.mercurial-scm.org/D7030 + +diff -r 843da18386d5 -r 2a0774e9d2a8 mercurial/cext/dirs.c +--- a/mercurial/cext/dirs.c Sun Oct 06 23:36:52 2019 -0400 ++++ b/mercurial/cext/dirs.c Tue Oct 08 16:18:15 2019 -0400 +@@ -68,26 +68,41 @@ + while ((pos = _finddir(cpath, pos - 1)) != -1) { + PyObject *val; + +- /* It's likely that every prefix already has an entry +- in our dict. Try to avoid allocating and +- deallocating a string for each prefix we check. */ +- if (key != NULL) +- ((PyBytesObject *)key)->ob_shash = -1; +- else { +- /* Force Python to not reuse a small shared string. */ +- key = PyBytes_FromStringAndSize(cpath, +- pos < 2 ? 2 : pos); ++ if (pos < 2) { ++ key = PyBytes_FromStringAndSize(cpath, pos); + if (key == NULL) + goto bail; ++ } else { ++ /* It's likely that every prefix already has an entry ++ in our dict. Try to avoid allocating and ++ deallocating a string for each prefix we check. */ ++ if (key != NULL) ++ ((PyBytesObject *)key)->ob_shash = -1; ++ else { ++ /* We know pos >= 2, so we won't get a small ++ * shared string. */ ++ key = PyBytes_FromStringAndSize(cpath, pos); ++ if (key == NULL) ++ goto bail; ++ } ++ /* Py_SIZE(o) refers to the ob_size member of ++ * the struct. Yes, assigning to what looks ++ * like a function seems wrong. */ ++ Py_SIZE(key) = pos; ++ ((PyBytesObject *)key)->ob_sval[pos] = '\0'; + } +- /* Py_SIZE(o) refers to the ob_size member of the struct. Yes, +- * assigning to what looks like a function seems wrong. */ +- Py_SIZE(key) = pos; +- ((PyBytesObject *)key)->ob_sval[pos] = '\0'; + + val = PyDict_GetItem(dirs, key); + if (val != NULL) { + PYLONG_VALUE(val) += 1; ++ if (pos < 2) { ++ /* This was a short string, so we ++ * probably got a small shared string ++ * we can't mutate on the next loop ++ * iteration. Clear it. ++ */ ++ Py_CLEAR(key); ++ } + break; + } + diff --git a/dirs-give-formatting-oversight-to-clang-format.patch b/dirs-give-formatting-oversight-to-clang-format.patch new file mode 100644 index 0000000000000000000000000000000000000000..d36457087c2b95f1ee5c6936794fb0e788bc3e44 --- /dev/null +++ b/dirs-give-formatting-oversight-to-clang-format.patch @@ -0,0 +1,98 @@ +# HG changeset patch +# User Augie Fackler +# Date 1571147645 14400 +# Node ID ea62d7b06c129be54aaf0cf389b6e14dfedf638b +# Parent be178b5d91c823cf91ed28f6f369b902d3e2cdec +dirs: give formatting oversight to clang-format + +Differential Revision: https://phab.mercurial-scm.org/D7104 + +diff -r be178b5d91c8 -r ea62d7b06c12 contrib/clang-format-ignorelist +--- a/contrib/clang-format-ignorelist Tue Oct 15 09:52:33 2019 -0400 ++++ b/contrib/clang-format-ignorelist Tue Oct 15 09:54:05 2019 -0400 +@@ -1,6 +1,5 @@ + # Files that just need to be migrated to the formatter. + # Do not add new files here! +-mercurial/cext/dirs.c + mercurial/cext/manifest.c + mercurial/cext/osutil.c + # Vendored code that we should never format: +diff -r be178b5d91c8 -r ea62d7b06c12 mercurial/cext/dirs.c +--- a/mercurial/cext/dirs.c Tue Oct 15 09:52:33 2019 -0400 ++++ b/mercurial/cext/dirs.c Tue Oct 15 09:54:05 2019 -0400 +@@ -42,7 +42,7 @@ + pos -= 1; + } + if (pos == -1) { +- return 0; ++ return 0; + } + + return pos; +@@ -56,13 +56,13 @@ + int ret = -1; + + /* This loop is super critical for performance. That's why we inline +- * access to Python structs instead of going through a supported API. +- * The implementation, therefore, is heavily dependent on CPython +- * implementation details. We also commit violations of the Python +- * "protocol" such as mutating immutable objects. But since we only +- * mutate objects created in this function or in other well-defined +- * locations, the references are known so these violations should go +- * unnoticed. */ ++ * access to Python structs instead of going through a supported API. ++ * The implementation, therefore, is heavily dependent on CPython ++ * implementation details. We also commit violations of the Python ++ * "protocol" such as mutating immutable objects. But since we only ++ * mutate objects created in this function or in other well-defined ++ * locations, the references are known so these violations should go ++ * unnoticed. */ + while ((pos = _finddir(cpath, pos - 1)) != -1) { + PyObject *val; + +@@ -120,7 +120,7 @@ + val = PyDict_GetItem(dirs, key); + if (val == NULL) { + PyErr_SetString(PyExc_ValueError, +- "expected a value, found none"); ++ "expected a value, found none"); + goto bail; + } + +@@ -152,7 +152,7 @@ + if (skipchar) { + if (!dirstate_tuple_check(value)) { + PyErr_SetString(PyExc_TypeError, +- "expected a dirstate tuple"); ++ "expected a dirstate tuple"); + return -1; + } + if (((dirstateTupleObject *)value)->state == skipchar) +@@ -218,8 +218,8 @@ + ret = dirs_fromdict(dirs, source, skipchar); + else if (skipchar) + PyErr_SetString(PyExc_ValueError, +- "skip character is only supported " +- "with a dict source"); ++ "skip character is only supported " ++ "with a dict source"); + else + ret = dirs_fromiter(dirs, source); + +@@ -276,12 +276,12 @@ + static PySequenceMethods dirs_sequence_methods; + + static PyMethodDef dirs_methods[] = { +- {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"}, +- {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"}, +- {NULL} /* Sentinel */ ++ {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"}, ++ {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"}, ++ {NULL} /* Sentinel */ + }; + +-static PyTypeObject dirsType = { PyVarObject_HEAD_INIT(NULL, 0) }; ++static PyTypeObject dirsType = {PyVarObject_HEAD_INIT(NULL, 0)}; + + void dirs_module_init(PyObject *mod) + { diff --git a/dirs-reject-consecutive-slashes-in-paths.patch b/dirs-reject-consecutive-slashes-in-paths.patch new file mode 100644 index 0000000000000000000000000000000000000000..21b1615fbc613b6a7437c1dd32822728b64eafbc --- /dev/null +++ b/dirs-reject-consecutive-slashes-in-paths.patch @@ -0,0 +1,84 @@ +# HG changeset patch +# User Augie Fackler +# Date 1571354962 14400 +# Node ID 5d40317d42b7083b49467502549e25f144888cb3 +# Parent 3a463e5e470b40c275091a38b1a4464e36c0c5a4 +dirs: reject consecutive slashes in paths + +We shouldn't ever see those, and the fuzzer go really excited that if +it gives us a 65k string with 55k slashes in it we use a lot of RAM. + +This is a better fix than what I tried in D7105. It was suggested by +Yuya, and I verified it does in fact cause the fuzzer to not OOM. + +This is a revision of D7234, but with the missing set of an error +added. I added a unit test of the dirs behavior because I needed to +reason more carefully about the failure modes around consecutive +slashes. + +Differential Revision: https://phab.mercurial-scm.org/D7252 + +diff -r 3a463e5e470b -r 5d40317d42b7 mercurial/cext/dirs.c +--- a/mercurial/cext/dirs.c Tue Nov 05 22:56:12 2019 -0500 ++++ b/mercurial/cext/dirs.c Thu Oct 17 19:29:22 2019 -0400 +@@ -66,6 +66,14 @@ + while ((pos = _finddir(cpath, pos - 1)) != -1) { + PyObject *val; + ++ /* Sniff for trailing slashes, a marker of an invalid input. */ ++ if (pos > 0 && cpath[pos - 1] == '/') { ++ PyErr_SetString( ++ PyExc_ValueError, ++ "found invalid consecutive slashes in path"); ++ goto bail; ++ } ++ + key = PyBytes_FromStringAndSize(cpath, pos); + if (key == NULL) + goto bail; +diff -r 3a463e5e470b -r 5d40317d42b7 mercurial/util.py +--- a/mercurial/util.py Tue Nov 05 22:56:12 2019 -0500 ++++ b/mercurial/util.py Thu Oct 17 19:29:22 2019 -0400 +@@ -3515,6 +3515,10 @@ + def addpath(self, path): + dirs = self._dirs + for base in finddirs(path): ++ if base.endswith(b'/'): ++ raise ValueError( ++ "found invalid consecutive slashes in path: %r" % base ++ ) + if base in dirs: + dirs[base] += 1 + return +diff -r 3a463e5e470b -r 5d40317d42b7 tests/test-dirs.py +--- /dev/null Thu Jan 01 00:00:00 1970 +0000 ++++ b/tests/test-dirs.py Thu Oct 17 19:29:22 2019 -0400 +@@ -0,0 +1,27 @@ ++from __future__ import absolute_import ++ ++import unittest ++ ++import silenttestrunner ++ ++from mercurial import util ++ ++ ++class dirstests(unittest.TestCase): ++ def testdirs(self): ++ for case, want in [ ++ (b'a/a/a', [b'a', b'a/a', b'']), ++ (b'alpha/beta/gamma', [b'', b'alpha', b'alpha/beta']), ++ ]: ++ d = util.dirs({}) ++ d.addpath(case) ++ self.assertEqual(sorted(d), sorted(want)) ++ ++ def testinvalid(self): ++ with self.assertRaises(ValueError): ++ d = util.dirs({}) ++ d.addpath(b'a//b') ++ ++ ++if __name__ == '__main__': ++ silenttestrunner.main(__name__) + diff --git a/dirs-remove-mutable-string-optimization-at-all.patch b/dirs-remove-mutable-string-optimization-at-all.patch new file mode 100644 index 0000000000000000000000000000000000000000..93fb54e9d6662a2d8a8ef0b19bf9a5335a0b171c --- /dev/null +++ b/dirs-remove-mutable-string-optimization-at-all.patch @@ -0,0 +1,76 @@ +# HG changeset patch +# User Yuya Nishihara +# Date 1570964769 -32400 +# Node ID 9fa941faef94a18e493cd571246f8c1a8730bf35 +# Parent 0d609ed185ea3847bfd6a5ec89d1c8efa373fdbb +dirs: remove mutable string optimization at all + +As far as I can see, the optimization trick has been dead since 42e89b87ca79 +"dirs: speed up by storing number of direct children per dir". After +42e89b87ca79, the key variable is cleared to NULL at each iteration. + +diff -r 0d609ed185ea -r 9fa941faef94 mercurial/cext/dirs.c +--- a/mercurial/cext/dirs.c Tue Oct 15 12:14:44 2019 +0200 ++++ b/mercurial/cext/dirs.c Sun Oct 13 20:06:09 2019 +0900 +@@ -26,9 +26,6 @@ + * + * We modify Python integers for refcounting, but those integers are + * never visible to Python code. +- * +- * We mutate strings in-place, but leave them immutable once they can +- * be seen by Python code. + */ + typedef struct { + PyObject_HEAD +@@ -63,46 +60,18 @@ + * "protocol" such as mutating immutable objects. But since we only + * mutate objects created in this function or in other well-defined + * locations, the references are known so these violations should go +- * unnoticed. The code for adjusting the length of a PyBytesObject is +- * essentially a minimal version of _PyBytes_Resize. */ ++ * unnoticed. */ + while ((pos = _finddir(cpath, pos - 1)) != -1) { + PyObject *val; + +- if (pos < 2) { +- key = PyBytes_FromStringAndSize(cpath, pos); +- if (key == NULL) +- goto bail; +- } else { +- /* It's likely that every prefix already has an entry +- in our dict. Try to avoid allocating and +- deallocating a string for each prefix we check. */ +- if (key != NULL) +- ((PyBytesObject *)key)->ob_shash = -1; +- else { +- /* We know pos >= 2, so we won't get a small +- * shared string. */ +- key = PyBytes_FromStringAndSize(cpath, pos); +- if (key == NULL) +- goto bail; +- } +- /* Py_SIZE(o) refers to the ob_size member of +- * the struct. Yes, assigning to what looks +- * like a function seems wrong. */ +- Py_SIZE(key) = pos; +- ((PyBytesObject *)key)->ob_sval[pos] = '\0'; +- } ++ key = PyBytes_FromStringAndSize(cpath, pos); ++ if (key == NULL) ++ goto bail; + + val = PyDict_GetItem(dirs, key); + if (val != NULL) { + PYLONG_VALUE(val) += 1; +- if (pos < 2) { +- /* This was a short string, so we +- * probably got a small shared string +- * we can't mutate on the next loop +- * iteration. Clear it. +- */ +- Py_CLEAR(key); +- } ++ Py_CLEAR(key); + break; + } + diff --git a/dirs-resolve-fuzzer-OOM-situation-by-disallowing-deep-directory-hierarchies.patch b/dirs-resolve-fuzzer-OOM-situation-by-disallowing-deep-directory-hierarchies.patch new file mode 100644 index 0000000000000000000000000000000000000000..d4a209cd5bf02510671594e5c6ca8408be7b2d2b --- /dev/null +++ b/dirs-resolve-fuzzer-OOM-situation-by-disallowing-deep-directory-hierarchies.patch @@ -0,0 +1,69 @@ +# HG changeset patch +# User Augie Fackler +# Date 1573571879 18000 +# Node ID 0796e266d26bdc4e116012bb1f8039ee76f2e9c3 +# Parent 38387f9e4d22056b5b75cb9918152447f739dd7d +dirs: resolve fuzzer OOM situation by disallowing deep directory hierarchies + +It seems like 2048 directories ought to be enough for any reasonable +use of Mercurial? + +A previous version of this patch scanned for slashes before any allocations +occurred. That approach is slower than this in the happy path, but much faster +than this in the case that too many slashes are encountered. We may want to +revisit it in the future using memchr() so it'll be well-optimized by the libc +we're using. + +.. bc: + + Mercurial will now defend against OOMs by refusing to operate on + paths with 2048 or more components. This means that _extremely_ + deep path hierarchies will be rejected, but we anticipate nobody + is using hierarchies this deep. + +Differential Revision: https://phab.mercurial-scm.org/D7411 + +diff -r 38387f9e4d22 -r 0796e266d26b mercurial/cext/dirs.c +--- a/mercurial/cext/dirs.c Thu Nov 14 14:14:11 2019 -0800 ++++ b/mercurial/cext/dirs.c Tue Nov 12 10:17:59 2019 -0500 +@@ -9,6 +9,7 @@ + + #define PY_SSIZE_T_CLEAN + #include ++#include + + #include "util.h" + +@@ -48,12 +49,19 @@ + return pos; + } + ++/* Mercurial will fail to run on directory hierarchies deeper than ++ * this constant, so we should try and keep this constant as big as ++ * possible. ++ */ ++#define MAX_DIRS_DEPTH 2048 ++ + static int _addpath(PyObject *dirs, PyObject *path) + { + const char *cpath = PyBytes_AS_STRING(path); + Py_ssize_t pos = PyBytes_GET_SIZE(path); + PyObject *key = NULL; + int ret = -1; ++ size_t num_slashes = 0; + + /* This loop is super critical for performance. That's why we inline + * access to Python structs instead of going through a supported API. +@@ -65,6 +73,12 @@ + * unnoticed. */ + while ((pos = _finddir(cpath, pos - 1)) != -1) { + PyObject *val; ++ ++num_slashes; ++ if (num_slashes > MAX_DIRS_DEPTH) { ++ PyErr_SetString(PyExc_ValueError, ++ "Directory hierarchy too deep."); ++ goto bail; ++ } + + /* Sniff for trailing slashes, a marker of an invalid input. */ + if (pos > 0 && cpath[pos - 1] == '/') { diff --git a/dirs-tag-a-struct-as-not-being-formattable.patch b/dirs-tag-a-struct-as-not-being-formattable.patch new file mode 100644 index 0000000000000000000000000000000000000000..40fe329bacc4c0489e57468434c02f8c71c4c7bf --- /dev/null +++ b/dirs-tag-a-struct-as-not-being-formattable.patch @@ -0,0 +1,25 @@ +# HG changeset patch +# User Augie Fackler +# Date 1571147553 14400 +# Node ID be178b5d91c823cf91ed28f6f369b902d3e2cdec +# Parent 30570a056fa8396e6008e0ebb611ff3d7c020d22 +dirs: tag a struct as not being formattable + +Differential Revision: https://phab.mercurial-scm.org/D7103 + +diff -r 30570a056fa8 -r be178b5d91c8 mercurial/cext/dirs.c +--- a/mercurial/cext/dirs.c Wed Oct 02 14:38:34 2019 -0400 ++++ b/mercurial/cext/dirs.c Tue Oct 15 09:52:33 2019 -0400 +@@ -27,10 +27,12 @@ + * We modify Python integers for refcounting, but those integers are + * never visible to Python code. + */ ++/* clang-format off */ + typedef struct { + PyObject_HEAD + PyObject *dict; + } dirsObject; ++/* clang-format on */ + + static inline Py_ssize_t _finddir(const char *path, Py_ssize_t pos) + { diff --git a/mercurial.spec b/mercurial.spec index ee7453f3bf7bf93f1af930226bdb397939443dd6..04ec6d1e8efd27503a06554301c4ff6df9d8cd97 100644 --- a/mercurial.spec +++ b/mercurial.spec @@ -2,12 +2,19 @@ Name: mercurial Version: 5.1 -Release: 2 +Release: 3 Summary: Source control management tool License: GPLv2+ URL: http://www.selenic.com/mercurial/ Source0: http://www.selenic.com/mercurial/release/%{name}-%{version}.tar.gz +Patch0: dirs-fix-trivial-over-read-of-input-data.patch +Patch1: dirs-remove-mutable-string-optimization-at-all.patch +Patch2: dirs-tag-a-struct-as-not-being-formattable.patch +Patch3: dirs-give-formatting-oversight-to-clang-format.patch +Patch4: dirs-reject-consecutive-slashes-in-paths.patch +Patch5: dirs-resolve-fuzzer-OOM-situation-by-disallowing-deep-directory-hierarchies.patch + BuildRequires: gcc python2 python2-devel bash-completion emacs-nox emacs-el pkgconfig gettext python2-docutils Requires: python2 emacs-filesystem tk Provides: hg = %{version}-%{release} emacs-mercurial <= 3.4.1 emacs-mercurial-el <= 3.4.1 @@ -23,7 +30,7 @@ It efficiently handles projects of any size and offers an easy and intuitive int #Build sections %prep -%autosetup -n %{name}-%{version} +%autosetup -n %{name}-%{version} -p1 sed -i 's|python|python2|' %{_builddir}/%{name}-%{version}/Makefile %{_builddir}/%{name}-%{version}/doc/Makefile @@ -131,6 +138,12 @@ grep -v locale %{name}-base.files > %{name}-base-filtered.files %{_mandir}/man?/chg.*.gz %changelog +* Thu Apr 23 2020 openEuler Buildteam - 5.1-3 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:fix the problems detected by oss-fuzz + * Thu Jan 9 2020 JeanLeo - 5.1-2 - Type:bugfix - Id:NA