diff --git a/backport-gh-93065-Fix-HAMT-to-iterate-correctly-over-7-level-.patch b/backport-gh-93065-Fix-HAMT-to-iterate-correctly-over-7-level-.patch new file mode 100644 index 0000000000000000000000000000000000000000..6dfb71b5fcab6c42d9956c33bdf8a5d38371ae72 --- /dev/null +++ b/backport-gh-93065-Fix-HAMT-to-iterate-correctly-over-7-level-.patch @@ -0,0 +1,143 @@ +From 95c9c2b9cb2d3c1d29c8ce77f154de8bd5313dae Mon Sep 17 00:00:00 2001 +From: "Miss Islington (bot)" + <31488909+miss-islington@users.noreply.github.com> +Date: Tue, 24 May 2022 01:52:49 -0700 +Subject: [PATCH] gh-93065: Fix HAMT to iterate correctly over 7-level +deep + trees (GH-93066) (#93147) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Also while there, clarify a few things about why we reduce the hash to +32 bits. + +Co-authored-by: Eli Libman +Co-authored-by: Yury Selivanov +Co-authored-by: Ɓukasz Langa + +(cherry picked from commit c1f5c903a7e4ed27190488f4e33b00d3c3d952e5) + +--- + Include/internal/pycore_hamt.h | 14 +++++++++++++- + Lib/test/test_context.py | 35 ++++++++++++++++++++++++++++++++++ + Misc/ACKS | 1 + + Python/hamt.c | 14 +++++++++++--- + 4 files changed, 60 insertions(+), 4 deletions(-) + +diff --git a/Include/internal/pycore_hamt.h b/Include/internal/pycore_hamt.h +index aaf6559..357d966 100644 +--- a/Include/internal/pycore_hamt.h ++++ b/Include/internal/pycore_hamt.h +@@ -5,7 +5,19 @@ + # error "this header requires Py_BUILD_CORE define" + #endif + +-#define _Py_HAMT_MAX_TREE_DEPTH 7 ++ ++/* ++HAMT tree is shaped by hashes of keys. Every group of 5 bits of a hash denotes ++the exact position of the key in one level of the tree. Since we're using ++32 bit hashes, we can have at most 7 such levels. Although if there are ++two distinct keys with equal hashes, they will have to occupy the same ++cell in the 7th level of the tree -- so we'd put them in a "collision" node. ++Which brings the total possible tree depth to 8. Read more about the actual ++layout of the HAMT tree in `hamt.c`. ++ ++This constant is used to define a datastucture for storing iteration state. ++*/ ++#define _Py_HAMT_MAX_TREE_DEPTH 8 + + + #define PyHamt_Check(o) Py_IS_TYPE(o, &_PyHamt_Type) +diff --git a/Lib/test/test_context.py b/Lib/test/test_context.py +index 2d8b63a..689e3d4 100644 +--- a/Lib/test/test_context.py ++++ b/Lib/test/test_context.py +@@ -533,6 +533,41 @@ class HamtTest(unittest.TestCase): + self.assertEqual(len(h4), 2) + self.assertEqual(len(h5), 3) + ++ def test_hamt_collision_3(self): ++ # Test that iteration works with the deepest tree possible. ++ # https://github.com/python/cpython/issues/93065 ++ ++ C = HashKey(0b10000000_00000000_00000000_00000000, 'C') ++ D = HashKey(0b10000000_00000000_00000000_00000000, 'D') ++ ++ E = HashKey(0b00000000_00000000_00000000_00000000, 'E') ++ ++ h = hamt() ++ h = h.set(C, 'C') ++ h = h.set(D, 'D') ++ h = h.set(E, 'E') ++ ++ # BitmapNode(size=2 count=1 bitmap=0b1): ++ # NULL: ++ # BitmapNode(size=2 count=1 bitmap=0b1): ++ # NULL: ++ # BitmapNode(size=2 count=1 bitmap=0b1): ++ # NULL: ++ # BitmapNode(size=2 count=1 bitmap=0b1): ++ # NULL: ++ # BitmapNode(size=2 count=1 bitmap=0b1): ++ # NULL: ++ # BitmapNode(size=2 count=1 bitmap=0b1): ++ # NULL: ++ # BitmapNode(size=4 count=2 bitmap=0b101): ++ # : 'E' ++ # NULL: ++ # CollisionNode(size=4 id=0x107a24520): ++ # : 'C' ++ # : 'D' ++ ++ self.assertEqual({k.name for k in h.keys()}, {'C', 'D', 'E'}) ++ + def test_hamt_stress(self): + COLLECTION_SIZE = 7000 + TEST_ITERS_EVERY = 647 +diff --git a/Misc/ACKS b/Misc/ACKS +index ac893ac..8699b98 100644 +--- a/Misc/ACKS ++++ b/Misc/ACKS +@@ -1031,6 +1031,7 @@ Robert Li + Xuanji Li + Zekun Li + Zheao Li ++Eli Libman + Dan Lidral-Porter + Robert van Liere + Ross Light +diff --git a/Python/hamt.c b/Python/hamt.c +index 8801c5e..3296109 100644 +--- a/Python/hamt.c ++++ b/Python/hamt.c +@@ -407,14 +407,22 @@ hamt_hash(PyObject *o) + return -1; + } + +- /* While it's suboptimal to reduce Python's 64 bit hash to ++ /* While it's somewhat suboptimal to reduce Python's 64 bit hash to + 32 bits via XOR, it seems that the resulting hash function + is good enough (this is also how Long type is hashed in Java.) + Storing 10, 100, 1000 Python strings results in a relatively + shallow and uniform tree structure. + +- Please don't change this hashing algorithm, as there are many +- tests that test some exact tree shape to cover all code paths. ++ Also it's worth noting that it would be possible to adapt the tree ++ structure to 64 bit hashes, but that would increase memory pressure ++ and provide little to no performance benefits for collections with ++ fewer than billions of key/value pairs. ++ ++ Important: do not change this hash reducing function. There are many ++ tests that need an exact tree shape to cover all code paths and ++ we do that by specifying concrete values for test data's `__hash__`. ++ If this function is changed most of the regression tests would ++ become useless. + */ + int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32); + return xored == -1 ? -2 : xored; +-- +2.33.0 + diff --git a/python3.spec b/python3.spec index 87f80f3f3ea3e01778192fd275a9c1fa519053d7..13d0b6865d34eafd7337ee9ff51c1852aa9b7165 100644 --- a/python3.spec +++ b/python3.spec @@ -3,7 +3,7 @@ Summary: Interpreter of the Python3 programming language URL: https://www.python.org/ Version: 3.9.9 -Release: 27 +Release: 28 License: Python-2.0 %global branchversion 3.9 @@ -109,6 +109,7 @@ Patch6015: backport-CVE-2007-4559.patch Patch6016: backport-CVE-2023-40217.patch Patch6017: backport-3.9-gh-104049-do-not-expose-on-disk-location-from-Si.patch Patch6018: backport-3.9-gh-99889-Fix-directory-traversal-security-flaw-i.patch +Patch6019: backport-gh-93065-Fix-HAMT-to-iterate-correctly-over-7-level-.patch Patch9000: add-the-sm3-method-for-obtaining-the-salt-value.patch Patch9001: python3-Add-sw64-architecture.patch @@ -216,6 +217,7 @@ rm -r Modules/expat %patch6016 -p1 %patch6017 -p1 %patch6018 -p1 +%patch6019 -p1 %patch9000 -p1 %patch9001 -p1 @@ -844,6 +846,12 @@ export BEP_GTDLIST="$BEP_GTDLIST_TMP" %{_mandir}/*/* %changelog +* Fri Feb 23 xinsheng - 3.9.9-28 +- Type:bugfix +- CVE:NA +- SUG:NA +- DESC:Fix HAMT to iterate correctly over 7 level + * Fri Sep 22 renhongxun - 3.9.9-27 - Type:bugfix - CVE:NA