diff --git a/glibc.spec b/glibc.spec index 25f65721d993655f7bb2afd5eb668ff20c2df3cb..70d41bb26e75c2588cf128011560f8c8ccbf200a 100644 --- a/glibc.spec +++ b/glibc.spec @@ -66,7 +66,7 @@ ############################################################################## Name: glibc Version: 2.34 -Release: 69 +Release: 70 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -201,6 +201,13 @@ Patch115: x86-Fallback-str-wcs-cmp-RTM-in-the-ncmp-overflow-ca.patch Patch116: x86-Test-wcscmp-RTM-in-the-wcsncmp-overflow-case-BZ-.patch Patch117: x86-Fix-TEST_NAME-to-make-it-a-string-in-tst-strncmp.patch Patch118: Add-PTRACE_GET_RSEQ_CONFIGURATION-from-Linux-5.13-to.patch +Patch119: malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch +Patch120: malloc-hugepage-0002-malloc-Add-THP-madvise-support-for-sbrk.patch +Patch121: malloc-hugepage-0003-malloc-Move-mmap-logic-to-its-own-function.patch +Patch122: malloc-hugepage-0004-malloc-Add-Huge-Page-support-for-mmap.patch +Patch123: malloc-hugepage-0005-malloc-Add-Huge-Page-support-to-arenas.patch +Patch124: malloc-hugepage-0006-malloc-Move-MORECORE-fallback-mmap-to-sysmalloc_mmap.patch +Patch125: malloc-hugepage-0007-malloc-Enable-huge-page-support-on-main-arena.patch Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch @@ -1293,6 +1300,15 @@ fi %endif %changelog +* Tue Mar 15 2022 Yang Yanchao - 2.34-70 +- malloc: Add madvise support for Transparent Huge Pages +- malloc: Add THP/madvise support for sbrk +- malloc: Move mmap logic to its own function +- malloc: Add Huge Page support for mmap +- malloc: Add Huge Page support to arenas +- malloc: Move MORECORE fallback mmap to sysmalloc_mmap_fallback +- malloc: Enable huge page support on main arena + * Sat Mar 12 2022 Yang Yanchao - 2.34-69 - malloc: use __get_nprocs replace __get_nprocs_sched. diff --git a/malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch b/malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch new file mode 100644 index 0000000000000000000000000000000000000000..1fc292a4ee3ab49f23cbb1dd86fb253123a732a0 --- /dev/null +++ b/malloc-hugepage-0001-malloc-Add-madvise-support-for-Transparent-Huge-Page.patch @@ -0,0 +1,532 @@ +From 5f6d8d97c69748180f0031dfa385aff75062c4d5 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Fri, 13 Aug 2021 08:36:29 -0300 +Subject: [PATCH 1/7] malloc: Add madvise support for Transparent Huge Pages + +Linux Transparent Huge Pages (THP) current supports three different +states: 'never', 'madvise', and 'always'. The 'never' is +self-explanatory and 'always' will enable THP for all anonymous +pages. However, 'madvise' is still the default for some system and +for such case THP will be only used if the memory range is explicity +advertise by the program through a madvise(MADV_HUGEPAGE) call. + +To enable it a new tunable is provided, 'glibc.malloc.hugetlb', +where setting to a value diffent than 0 enables the madvise call. + +This patch issues the madvise(MADV_HUGEPAGE) call after a successful +mmap() call at sysmalloc() with sizes larger than the default huge +page size. The madvise() call is disable is system does not support +THP or if it has the mode set to "never" and on Linux only support +one page size for THP, even if the architecture supports multiple +sizes. + +To test is a new rule is added tests-malloc-hugetlb1, which run the +addes tests with the required GLIBC_TUNABLE setting. + +Checked on x86_64-linux-gnu. + +Reviewed-by: DJ Delorie +--- + NEWS | 5 ++ + Rules | 19 ++++++ + elf/dl-tunables.list | 5 ++ + elf/tst-rtld-list-tunables.exp | 1 + + malloc/Makefile | 16 +++++ + malloc/arena.c | 5 ++ + malloc/malloc-internal.h | 1 + + malloc/malloc.c | 47 ++++++++++++++ + manual/tunables.texi | 10 +++ + sysdeps/generic/Makefile | 8 +++ + sysdeps/generic/malloc-hugepages.c | 31 +++++++++ + sysdeps/generic/malloc-hugepages.h | 37 +++++++++++ + sysdeps/unix/sysv/linux/malloc-hugepages.c | 74 ++++++++++++++++++++++ + 13 files changed, 259 insertions(+) + create mode 100644 sysdeps/generic/malloc-hugepages.c + create mode 100644 sysdeps/generic/malloc-hugepages.h + create mode 100644 sysdeps/unix/sysv/linux/malloc-hugepages.c + +diff --git a/NEWS b/NEWS +index 2532565d77..3b94dd209c 100644 +--- a/NEWS ++++ b/NEWS +@@ -92,6 +92,11 @@ Major new features: + variables. The GNU C Library manual has details on integration of + Restartable Sequences. + ++* On Linux, a new tunable, glibc.malloc.hugetlb, can be used to ++ make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls. ++ Setting this might improve performance with Transparent Huge Pages madvise ++ mode depending of the workload. ++ + Deprecated and removed features, and other changes affecting compatibility: + + * The function pthread_mutex_consistent_np has been deprecated; programs +diff --git a/Rules b/Rules +index b1137afe71..5f5d9ba4cc 100644 +--- a/Rules ++++ b/Rules +@@ -157,6 +157,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \ + $(tests-container:%=$(objpfx)%.out) \ + $(tests-mcheck:%=$(objpfx)%-mcheck.out) \ + $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \ ++ $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \ + $(tests-special) $(tests-printers-out) + xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special) + endif +@@ -168,6 +169,7 @@ tests-expected = + else + tests-expected = $(tests) $(tests-internal) $(tests-printers) \ + $(tests-container) $(tests-malloc-check:%=%-malloc-check) \ ++ $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \ + $(tests-mcheck:%=%-mcheck) + endif + tests: +@@ -196,6 +198,7 @@ binaries-pie-notests = + endif + binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck) + binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check) ++binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) + else + binaries-all-notests = + binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs) +@@ -207,6 +210,7 @@ binaries-pie-tests = + binaries-pie-notests = + binaries-mcheck-tests = + binaries-malloc-check-tests = ++binaries-malloc-hugetlb1-tests = + endif + + binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests) +@@ -247,6 +251,14 @@ $(addprefix $(objpfx),$(binaries-malloc-check-tests)): %-malloc-check: %.o \ + $(+link-tests) + endif + ++ifneq "$(strip $(binaries-malloc-hugetlb1-tests))" "" ++$(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o \ ++ $(link-extra-libs-tests) \ ++ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ ++ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) ++ $(+link-tests) ++endif ++ + ifneq "$(strip $(binaries-pie-tests))" "" + $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \ + $(link-extra-libs-tests) \ +@@ -284,6 +296,13 @@ $(1)-malloc-check-ENV = MALLOC_CHECK_=3 \ + endef + $(foreach t,$(tests-malloc-check),$(eval $(call malloc-check-ENVS,$(t)))) + ++# All malloc-hugetlb1 tests will be run with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 ++define malloc-hugetlb1-ENVS ++$(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1 ++endef ++$(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t)))) ++ ++ + # mcheck tests need the debug DSO to support -lmcheck. + define mcheck-ENVS + $(1)-mcheck-ENV = LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so +diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list +index ffcd7f18d4..d1fd3f3e91 100644 +--- a/elf/dl-tunables.list ++++ b/elf/dl-tunables.list +@@ -92,6 +92,11 @@ glibc { + minval: 0 + security_level: SXID_IGNORE + } ++ hugetlb { ++ type: INT_32 ++ minval: 0 ++ maxval: 1 ++ } + } + cpu { + hwcap_mask { +diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp +index 44e4834cfb..d8e363f2c5 100644 +--- a/elf/tst-rtld-list-tunables.exp ++++ b/elf/tst-rtld-list-tunables.exp +@@ -1,6 +1,7 @@ + glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+) + glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+) + glibc.malloc.check: 0 (min: 0, max: 3) ++glibc.malloc.hugetlb: 0 (min: 0, max: 1) + glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647) + glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+) + glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+) +diff --git a/malloc/Makefile b/malloc/Makefile +index 63cd7c0734..0137595e17 100644 +--- a/malloc/Makefile ++++ b/malloc/Makefile +@@ -78,6 +78,22 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \ + tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \ + $(tests-static),$(tests)) + ++# Run all testes with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 that check the ++# Transparent Huge Pages support. We need exclude some tests that define ++# the ENV vars. ++tests-exclude-hugetlb1 = \ ++ tst-compathooks-off \ ++ tst-compathooks-on \ ++ tst-interpose-nothread \ ++ tst-interpose-thread \ ++ tst-interpose-static-nothread \ ++ tst-interpose-static-thread \ ++ tst-malloc-usable \ ++ tst-malloc-usable-tunables \ ++ tst-mallocstate ++tests-malloc-hugetlb1 = \ ++ $(filter-out $(tests-exclude-hugetlb1), $(tests)) ++ + # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24. + ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes) + # Tests that don't play well with mcheck. They are either bugs in mcheck or +diff --git a/malloc/arena.c b/malloc/arena.c +index 78ef4cf18c..cd00c7bef4 100644 +--- a/malloc/arena.c ++++ b/malloc/arena.c +@@ -230,6 +230,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t) + TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t) + #endif + TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t) ++TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t) + #else + /* Initialization routine. */ + #include +@@ -330,6 +331,7 @@ ptmalloc_init (void) + TUNABLE_CALLBACK (set_tcache_unsorted_limit)); + # endif + TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast)); ++ TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb)); + #else + if (__glibc_likely (_environ != NULL)) + { +@@ -508,6 +510,9 @@ new_heap (size_t size, size_t top_pad) + __munmap (p2, HEAP_MAX_SIZE); + return 0; + } ++ ++ madvise_thp (p2, size); ++ + h = (heap_info *) p2; + h->size = size; + h->mprotect_size = size; +diff --git a/malloc/malloc-internal.h b/malloc/malloc-internal.h +index 0c7b5a183c..7493e34d86 100644 +--- a/malloc/malloc-internal.h ++++ b/malloc/malloc-internal.h +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + /* Called in the parent process before a fork. */ + void __malloc_fork_lock_parent (void) attribute_hidden; +diff --git a/malloc/malloc.c b/malloc/malloc.c +index 095d97a3be..c75841b841 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -1880,6 +1880,11 @@ struct malloc_par + INTERNAL_SIZE_T arena_test; + INTERNAL_SIZE_T arena_max; + ++#if HAVE_TUNABLES ++ /* Transparent Large Page support. */ ++ INTERNAL_SIZE_T thp_pagesize; ++#endif ++ + /* Memory map support */ + int n_mmaps; + int n_mmaps_max; +@@ -2008,6 +2013,20 @@ free_perturb (char *p, size_t n) + + #include + ++/* ----------- Routines dealing with transparent huge pages ----------- */ ++ ++static inline void ++madvise_thp (void *p, INTERNAL_SIZE_T size) ++{ ++#if HAVE_TUNABLES && defined (MADV_HUGEPAGE) ++ /* Do not consider areas smaller than a huge page or if the tunable is ++ not active. */ ++ if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize) ++ return; ++ __madvise (p, size, MADV_HUGEPAGE); ++#endif ++} ++ + /* ------------------- Support for multiple arenas -------------------- */ + #include "arena.c" + +@@ -2445,6 +2464,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + + if (mm != MAP_FAILED) + { ++ madvise_thp (mm, size); ++ + /* + The offset to the start of the mmapped region is stored + in the prev_size field of the chunk. This allows us to adjust +@@ -2606,6 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + if (size > 0) + { + brk = (char *) (MORECORE (size)); ++ if (brk != (char *) (MORECORE_FAILURE)) ++ madvise_thp (brk, size); + LIBC_PROBE (memory_sbrk_more, 2, brk, size); + } + +@@ -2637,6 +2660,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + + if (mbrk != MAP_FAILED) + { ++ madvise_thp (mbrk, size); ++ + /* We do not need, and cannot use, another sbrk call to find end */ + brk = mbrk; + snd_brk = brk + size; +@@ -2748,6 +2773,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + correction = 0; + snd_brk = (char *) (MORECORE (0)); + } ++ else ++ madvise_thp (snd_brk, correction); + } + + /* handle non-contiguous cases */ +@@ -2988,6 +3015,8 @@ mremap_chunk (mchunkptr p, size_t new_size) + if (cp == MAP_FAILED) + return 0; + ++ madvise_thp (cp, new_size); ++ + p = (mchunkptr) (cp + offset); + + assert (aligned_OK (chunk2mem (p))); +@@ -5316,6 +5345,24 @@ do_set_mxfast (size_t value) + return 0; + } + ++#if HAVE_TUNABLES ++static __always_inline int ++do_set_hugetlb (int32_t value) ++{ ++ if (value == 1) ++ { ++ enum malloc_thp_mode_t thp_mode = __malloc_thp_mode (); ++ /* ++ Only enable THP madvise usage if system does support it and ++ has 'madvise' mode. Otherwise the madvise() call is wasteful. ++ */ ++ if (thp_mode == malloc_thp_mode_madvise) ++ mp_.thp_pagesize = __malloc_default_thp_pagesize (); ++ } ++ return 0; ++} ++#endif ++ + int + __libc_mallopt (int param_number, int value) + { +diff --git a/manual/tunables.texi b/manual/tunables.texi +index 28ff502990..9ca6e3f603 100644 +--- a/manual/tunables.texi ++++ b/manual/tunables.texi +@@ -270,6 +270,16 @@ pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size + passed to @code{malloc} for the largest bin size to enable. + @end deftp + ++@deftp Tunable glibc.malloc.hugetlb ++This tunable controls the usage of Huge Pages on @code{malloc} calls. The ++default value is @code{0}, which disables any additional support on ++@code{malloc}. ++ ++Setting its value to @code{1} enables the use of @code{madvise} with ++@code{MADV_HUGEPAGE} after memory allocation with @code{mmap}. It is enabled ++only if the system supports Transparent Huge Page (currently only on Linux). ++@end deftp ++ + @node Dynamic Linking Tunables + @section Dynamic Linking Tunables + @cindex dynamic linking tunables +diff --git a/sysdeps/generic/Makefile b/sysdeps/generic/Makefile +index a209e85cc4..8eef83c94d 100644 +--- a/sysdeps/generic/Makefile ++++ b/sysdeps/generic/Makefile +@@ -27,3 +27,11 @@ sysdep_routines += framestate unwind-pe + shared-only-routines += framestate unwind-pe + endif + endif ++ ++ifeq ($(subdir),malloc) ++sysdep_malloc_debug_routines += malloc-hugepages ++endif ++ ++ifeq ($(subdir),misc) ++sysdep_routines += malloc-hugepages ++endif +diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c +new file mode 100644 +index 0000000000..8fb459a263 +--- /dev/null ++++ b/sysdeps/generic/malloc-hugepages.c +@@ -0,0 +1,31 @@ ++/* Huge Page support. Generic implementation. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#include ++ ++unsigned long int ++__malloc_default_thp_pagesize (void) ++{ ++ return 0; ++} ++ ++enum malloc_thp_mode_t ++__malloc_thp_mode (void) ++{ ++ return malloc_thp_mode_not_supported; ++} +diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h +new file mode 100644 +index 0000000000..f5a442e328 +--- /dev/null ++++ b/sysdeps/generic/malloc-hugepages.h +@@ -0,0 +1,37 @@ ++/* Malloc huge page support. Generic implementation. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#ifndef _MALLOC_HUGEPAGES_H ++#define _MALLOC_HUGEPAGES_H ++ ++#include ++ ++/* Return the default transparent huge page size. */ ++unsigned long int __malloc_default_thp_pagesize (void) attribute_hidden; ++ ++enum malloc_thp_mode_t ++{ ++ malloc_thp_mode_always, ++ malloc_thp_mode_madvise, ++ malloc_thp_mode_never, ++ malloc_thp_mode_not_supported ++}; ++ ++enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden; ++ ++#endif /* _MALLOC_HUGEPAGES_H */ +diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c +new file mode 100644 +index 0000000000..7497e07260 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c +@@ -0,0 +1,74 @@ ++/* Huge Page support. Linux implementation. ++ Copyright (C) 2021 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public License as ++ published by the Free Software Foundation; either version 2.1 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, see . */ ++ ++#include ++#include ++#include ++ ++unsigned long int ++__malloc_default_thp_pagesize (void) ++{ ++ int fd = __open64_nocancel ( ++ "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", O_RDONLY); ++ if (fd == -1) ++ return 0; ++ ++ char str[INT_BUFSIZE_BOUND (unsigned long int)]; ++ ssize_t s = __read_nocancel (fd, str, sizeof (str)); ++ __close_nocancel (fd); ++ if (s < 0) ++ return 0; ++ ++ unsigned long int r = 0; ++ for (ssize_t i = 0; i < s; i++) ++ { ++ if (str[i] == '\n') ++ break; ++ r *= 10; ++ r += str[i] - '0'; ++ } ++ return r; ++} ++ ++enum malloc_thp_mode_t ++__malloc_thp_mode (void) ++{ ++ int fd = __open64_nocancel ("/sys/kernel/mm/transparent_hugepage/enabled", ++ O_RDONLY); ++ if (fd == -1) ++ return malloc_thp_mode_not_supported; ++ ++ static const char mode_always[] = "[always] madvise never\n"; ++ static const char mode_madvise[] = "always [madvise] never\n"; ++ static const char mode_never[] = "always madvise [never]\n"; ++ ++ char str[sizeof(mode_always)]; ++ ssize_t s = __read_nocancel (fd, str, sizeof (str)); ++ __close_nocancel (fd); ++ ++ if (s == sizeof (mode_always) - 1) ++ { ++ if (strcmp (str, mode_always) == 0) ++ return malloc_thp_mode_always; ++ else if (strcmp (str, mode_madvise) == 0) ++ return malloc_thp_mode_madvise; ++ else if (strcmp (str, mode_never) == 0) ++ return malloc_thp_mode_never; ++ } ++ return malloc_thp_mode_not_supported; ++} +-- +2.33.0 + diff --git a/malloc-hugepage-0002-malloc-Add-THP-madvise-support-for-sbrk.patch b/malloc-hugepage-0002-malloc-Add-THP-madvise-support-for-sbrk.patch new file mode 100644 index 0000000000000000000000000000000000000000..58f2abfd05cebe9a202e5025f449a0f57c0763f1 --- /dev/null +++ b/malloc-hugepage-0002-malloc-Add-THP-madvise-support-for-sbrk.patch @@ -0,0 +1,111 @@ +From 7478c9959ae409f7b3d63146943575d6ee745352 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Fri, 13 Aug 2021 10:06:04 -0300 +Subject: [PATCH 2/7] malloc: Add THP/madvise support for sbrk + +To increase effectiveness with Transparent Huge Page with madvise, the +large page size is use instead page size for sbrk increment for the +main arena. + +Checked on x86_64-linux-gnu. + +Reviewed-by: DJ Delorie +--- + include/libc-pointer-arith.h | 8 ++++++++ + malloc/malloc.c | 34 +++++++++++++++++++++++++++++----- + 2 files changed, 37 insertions(+), 5 deletions(-) + +diff --git a/include/libc-pointer-arith.h b/include/libc-pointer-arith.h +index 04ba537617..55dccc10ac 100644 +--- a/include/libc-pointer-arith.h ++++ b/include/libc-pointer-arith.h +@@ -60,4 +60,12 @@ + #define PTR_ALIGN_UP(base, size) \ + ((__typeof__ (base)) ALIGN_UP ((uintptr_t) (base), (size))) + ++/* Check if BASE is aligned on SIZE */ ++#define PTR_IS_ALIGNED(base, size) \ ++ ((((uintptr_t) (base)) & (size - 1)) == 0) ++ ++/* Returns the ptrdiff_t diference between P1 and P2. */ ++#define PTR_DIFF(p1, p2) \ ++ ((ptrdiff_t)((uintptr_t)(p1) - (uintptr_t)(p2))) ++ + #endif +diff --git a/malloc/malloc.c b/malloc/malloc.c +index c75841b841..57db4dd9a5 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -2023,6 +2023,16 @@ madvise_thp (void *p, INTERNAL_SIZE_T size) + not active. */ + if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize) + return; ++ ++ /* Linux requires the input address to be page-aligned, and unaligned ++ inputs happens only for initial data segment. */ ++ if (__glibc_unlikely (!PTR_IS_ALIGNED (p, GLRO (dl_pagesize)))) ++ { ++ void *q = PTR_ALIGN_DOWN (p, GLRO (dl_pagesize)); ++ size += PTR_DIFF (p, q); ++ p = q; ++ } ++ + __madvise (p, size, MADV_HUGEPAGE); + #endif + } +@@ -2609,14 +2619,25 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + size -= old_size; + + /* +- Round to a multiple of page size. ++ Round to a multiple of page size or huge page size. + If MORECORE is not contiguous, this ensures that we only call it + with whole-page arguments. And if MORECORE is contiguous and + this is not first time through, this preserves page-alignment of + previous calls. Otherwise, we correct to page-align below. + */ + +- size = ALIGN_UP (size, pagesize); ++#if HAVE_TUNABLES && defined (MADV_HUGEPAGE) ++ /* Defined in brk.c. */ ++ extern void *__curbrk; ++ if (__glibc_unlikely (mp_.thp_pagesize != 0)) ++ { ++ uintptr_t top = ALIGN_UP ((uintptr_t) __curbrk + size, ++ mp_.thp_pagesize); ++ size = top - (uintptr_t) __curbrk; ++ } ++ else ++#endif ++ size = ALIGN_UP (size, GLRO(dl_pagesize)); + + /* + Don't try to call MORECORE if argument is so big as to appear +@@ -2899,10 +2920,8 @@ systrim (size_t pad, mstate av) + long released; /* Amount actually released */ + char *current_brk; /* address returned by pre-check sbrk call */ + char *new_brk; /* address returned by post-check sbrk call */ +- size_t pagesize; + long top_area; + +- pagesize = GLRO (dl_pagesize); + top_size = chunksize (av->top); + + top_area = top_size - MINSIZE - 1; +@@ -2910,7 +2929,12 @@ systrim (size_t pad, mstate av) + return 0; + + /* Release in pagesize units and round down to the nearest page. */ +- extra = ALIGN_DOWN(top_area - pad, pagesize); ++#if HAVE_TUNABLES && defined (MADV_HUGEPAGE) ++ if (__glibc_unlikely (mp_.thp_pagesize != 0)) ++ extra = ALIGN_DOWN (top_area - pad, mp_.thp_pagesize); ++ else ++#endif ++ extra = ALIGN_DOWN (top_area - pad, GLRO(dl_pagesize)); + + if (extra == 0) + return 0; +-- +2.33.0 + diff --git a/malloc-hugepage-0003-malloc-Move-mmap-logic-to-its-own-function.patch b/malloc-hugepage-0003-malloc-Move-mmap-logic-to-its-own-function.patch new file mode 100644 index 0000000000000000000000000000000000000000..5b1768bff898c3ff6d04ce9f0e5cdf1bbebe0d43 --- /dev/null +++ b/malloc-hugepage-0003-malloc-Move-mmap-logic-to-its-own-function.patch @@ -0,0 +1,205 @@ +From 6cc3ccc67e0dda654fc839377af2818a296f0007 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Mon, 16 Aug 2021 11:14:20 -0300 +Subject: [PATCH 3/7] malloc: Move mmap logic to its own function + +So it can be used with different pagesize and flags. + +Reviewed-by: DJ Delorie +--- + malloc/malloc.c | 164 ++++++++++++++++++++++++++---------------------- + 1 file changed, 88 insertions(+), 76 deletions(-) + +diff --git a/malloc/malloc.c b/malloc/malloc.c +index 57db4dd9a5..6b6ec53db1 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -2412,6 +2412,85 @@ do_check_malloc_state (mstate av) + be extended or replaced. + */ + ++static void * ++sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av) ++{ ++ long int size; ++ ++ /* ++ Round up size to nearest page. For mmapped chunks, the overhead is one ++ SIZE_SZ unit larger than for normal chunks, because there is no ++ following chunk whose prev_size field could be used. ++ ++ See the front_misalign handling below, for glibc there is no need for ++ further alignments unless we have have high alignment. ++ */ ++ if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ) ++ size = ALIGN_UP (nb + SIZE_SZ, pagesize); ++ else ++ size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize); ++ ++ /* Don't try if size wraps around 0. */ ++ if ((unsigned long) (size) <= (unsigned long) (nb)) ++ return MAP_FAILED; ++ ++ char *mm = (char *) MMAP (0, size, ++ mtag_mmap_flags | PROT_READ | PROT_WRITE, ++ extra_flags); ++ if (mm == MAP_FAILED) ++ return mm; ++ ++ madvise_thp (mm, size); ++ ++ /* ++ The offset to the start of the mmapped region is stored in the prev_size ++ field of the chunk. This allows us to adjust returned start address to ++ meet alignment requirements here and in memalign(), and still be able to ++ compute proper address argument for later munmap in free() and realloc(). ++ */ ++ ++ INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */ ++ ++ if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ) ++ { ++ /* For glibc, chunk2mem increases the address by CHUNK_HDR_SZ and ++ MALLOC_ALIGN_MASK is CHUNK_HDR_SZ-1. Each mmap'ed area is page ++ aligned and therefore definitely MALLOC_ALIGN_MASK-aligned. */ ++ assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0); ++ front_misalign = 0; ++ } ++ else ++ front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK; ++ ++ mchunkptr p; /* the allocated/returned chunk */ ++ ++ if (front_misalign > 0) ++ { ++ ptrdiff_t correction = MALLOC_ALIGNMENT - front_misalign; ++ p = (mchunkptr) (mm + correction); ++ set_prev_size (p, correction); ++ set_head (p, (size - correction) | IS_MMAPPED); ++ } ++ else ++ { ++ p = (mchunkptr) mm; ++ set_prev_size (p, 0); ++ set_head (p, size | IS_MMAPPED); ++ } ++ ++ /* update statistics */ ++ int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1; ++ atomic_max (&mp_.max_n_mmaps, new); ++ ++ unsigned long sum; ++ sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size; ++ atomic_max (&mp_.max_mmapped_mem, sum); ++ ++ check_chunk (av, p); ++ ++ return chunk2mem (p); ++} ++ + static void * + sysmalloc (INTERNAL_SIZE_T nb, mstate av) + { +@@ -2449,81 +2528,10 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold) + && (mp_.n_mmaps < mp_.n_mmaps_max))) + { +- char *mm; /* return value from mmap call*/ +- +- try_mmap: +- /* +- Round up size to nearest page. For mmapped chunks, the overhead +- is one SIZE_SZ unit larger than for normal chunks, because there +- is no following chunk whose prev_size field could be used. +- +- See the front_misalign handling below, for glibc there is no +- need for further alignments unless we have have high alignment. +- */ +- if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ) +- size = ALIGN_UP (nb + SIZE_SZ, pagesize); +- else +- size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize); ++ char *mm = sysmalloc_mmap (nb, pagesize, 0, av); ++ if (mm != MAP_FAILED) ++ return mm; + tried_mmap = true; +- +- /* Don't try if size wraps around 0 */ +- if ((unsigned long) (size) > (unsigned long) (nb)) +- { +- mm = (char *) (MMAP (0, size, +- mtag_mmap_flags | PROT_READ | PROT_WRITE, 0)); +- +- if (mm != MAP_FAILED) +- { +- madvise_thp (mm, size); +- +- /* +- The offset to the start of the mmapped region is stored +- in the prev_size field of the chunk. This allows us to adjust +- returned start address to meet alignment requirements here +- and in memalign(), and still be able to compute proper +- address argument for later munmap in free() and realloc(). +- */ +- +- if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ) +- { +- /* For glibc, chunk2mem increases the address by +- CHUNK_HDR_SZ and MALLOC_ALIGN_MASK is +- CHUNK_HDR_SZ-1. Each mmap'ed area is page +- aligned and therefore definitely +- MALLOC_ALIGN_MASK-aligned. */ +- assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0); +- front_misalign = 0; +- } +- else +- front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK; +- if (front_misalign > 0) +- { +- correction = MALLOC_ALIGNMENT - front_misalign; +- p = (mchunkptr) (mm + correction); +- set_prev_size (p, correction); +- set_head (p, (size - correction) | IS_MMAPPED); +- } +- else +- { +- p = (mchunkptr) mm; +- set_prev_size (p, 0); +- set_head (p, size | IS_MMAPPED); +- } +- +- /* update statistics */ +- +- int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1; +- atomic_max (&mp_.max_n_mmaps, new); +- +- unsigned long sum; +- sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size; +- atomic_max (&mp_.max_mmapped_mem, sum); +- +- check_chunk (av, p); +- +- return chunk2mem (p); +- } +- } + } + + /* There are no usable arenas and mmap also failed. */ +@@ -2600,8 +2608,12 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + } + } + else if (!tried_mmap) +- /* We can at least try to use to mmap memory. */ +- goto try_mmap; ++ { ++ /* We can at least try to use to mmap memory. */ ++ char *mm = sysmalloc_mmap (nb, pagesize, 0, av); ++ if (mm != MAP_FAILED) ++ return mm; ++ } + } + else /* av == main_arena */ + +-- +2.33.0 + diff --git a/malloc-hugepage-0004-malloc-Add-Huge-Page-support-for-mmap.patch b/malloc-hugepage-0004-malloc-Add-Huge-Page-support-for-mmap.patch new file mode 100644 index 0000000000000000000000000000000000000000..1969a1fe84a3a2e3c21137958b7e3238da9e5255 --- /dev/null +++ b/malloc-hugepage-0004-malloc-Add-Huge-Page-support-for-mmap.patch @@ -0,0 +1,476 @@ +From 98d5fcb8d099a1a868e032c89891c395a2f365c5 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Mon, 16 Aug 2021 15:08:27 -0300 +Subject: [PATCH 4/7] malloc: Add Huge Page support for mmap + +With the morecore hook removed, there is not easy way to provide huge +pages support on with glibc allocator without resorting to transparent +huge pages. And some users and programs do prefer to use the huge pages +directly instead of THP for multiple reasons: no splitting, re-merging +by the VM, no TLB shootdowns for running processes, fast allocation +from the reserve pool, no competition with the rest of the processes +unlike THP, no swapping all, etc. + +This patch extends the 'glibc.malloc.hugetlb' tunable: the value +'2' means to use huge pages directly with the system default size, +while a positive value means and specific page size that is matched +against the supported ones by the system. + +Currently only memory allocated on sysmalloc() is handled, the arenas +still uses the default system page size. + +To test is a new rule is added tests-malloc-hugetlb2, which run the +addes tests with the required GLIBC_TUNABLE setting. On systems without +a reserved huge pages pool, is just stress the mmap(MAP_HUGETLB) +allocation failure. To improve test coverage it is required to create +a pool with some allocated pages. + +Checked on x86_64-linux-gnu. + +Reviewed-by: DJ Delorie +--- + NEWS | 8 +- + Rules | 17 +++ + elf/dl-tunables.list | 3 +- + elf/tst-rtld-list-tunables.exp | 2 +- + malloc/Makefile | 8 +- + malloc/arena.c | 4 +- + malloc/malloc.c | 31 ++++- + manual/tunables.texi | 7 ++ + sysdeps/generic/malloc-hugepages.c | 8 ++ + sysdeps/generic/malloc-hugepages.h | 7 ++ + sysdeps/unix/sysv/linux/malloc-hugepages.c | 127 +++++++++++++++++++++ + 11 files changed, 207 insertions(+), 15 deletions(-) + +diff --git a/NEWS b/NEWS +index 3b94dd209c..c7200cd4e8 100644 +--- a/NEWS ++++ b/NEWS +@@ -93,9 +93,11 @@ Major new features: + Restartable Sequences. + + * On Linux, a new tunable, glibc.malloc.hugetlb, can be used to +- make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls. +- Setting this might improve performance with Transparent Huge Pages madvise +- mode depending of the workload. ++ either make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk ++ or to use huge pages directly with mmap calls with the MAP_HUGETLB ++ flags). The former can improve performance when Transparent Huge Pages ++ is set to 'madvise' mode while the latter uses the system reserved ++ huge pages. + + Deprecated and removed features, and other changes affecting compatibility: + +diff --git a/Rules b/Rules +index 5f5d9ba4cc..be34982daa 100644 +--- a/Rules ++++ b/Rules +@@ -158,6 +158,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \ + $(tests-mcheck:%=$(objpfx)%-mcheck.out) \ + $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \ + $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \ ++ $(tests-malloc-hugetlb2:%=$(objpfx)%-malloc-hugetlb2.out) \ + $(tests-special) $(tests-printers-out) + xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special) + endif +@@ -170,6 +171,7 @@ else + tests-expected = $(tests) $(tests-internal) $(tests-printers) \ + $(tests-container) $(tests-malloc-check:%=%-malloc-check) \ + $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \ ++ $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) \ + $(tests-mcheck:%=%-mcheck) + endif + tests: +@@ -199,6 +201,7 @@ endif + binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck) + binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check) + binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) ++binaries-malloc-hugetlb2-tests = $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) + else + binaries-all-notests = + binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs) +@@ -211,6 +214,7 @@ binaries-pie-notests = + binaries-mcheck-tests = + binaries-malloc-check-tests = + binaries-malloc-hugetlb1-tests = ++binaries-malloc-hugetlb2-tests = + endif + + binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests) +@@ -259,6 +263,14 @@ $(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o + $(+link-tests) + endif + ++ifneq "$(strip $(binaries-malloc-hugetlb2-tests))" "" ++$(addprefix $(objpfx),$(binaries-malloc-hugetlb2-tests)): %-malloc-hugetlb2: %.o \ ++ $(link-extra-libs-tests) \ ++ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ ++ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) ++ $(+link-tests) ++endif ++ + ifneq "$(strip $(binaries-pie-tests))" "" + $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \ + $(link-extra-libs-tests) \ +@@ -302,6 +314,11 @@ $(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1 + endef + $(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t)))) + ++# All malloc-hugetlb2 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=2 ++define malloc-hugetlb2-ENVS ++$(1)-malloc-hugetlb2-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=2 ++endef ++$(foreach t,$(tests-malloc-hugetlb2),$(eval $(call malloc-hugetlb2-ENVS,$(t)))) + + # mcheck tests need the debug DSO to support -lmcheck. + define mcheck-ENVS +diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list +index d1fd3f3e91..845d521a43 100644 +--- a/elf/dl-tunables.list ++++ b/elf/dl-tunables.list +@@ -93,9 +93,8 @@ glibc { + security_level: SXID_IGNORE + } + hugetlb { +- type: INT_32 ++ type: SIZE_T + minval: 0 +- maxval: 1 + } + } + cpu { +diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp +index d8e363f2c5..cdfdb56a94 100644 +--- a/elf/tst-rtld-list-tunables.exp ++++ b/elf/tst-rtld-list-tunables.exp +@@ -1,7 +1,7 @@ + glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+) + glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+) + glibc.malloc.check: 0 (min: 0, max: 3) +-glibc.malloc.hugetlb: 0 (min: 0, max: 1) ++glibc.malloc.hugetlb: 0x0 (min: 0x0, max: 0x[f]+) + glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647) + glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+) + glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+) +diff --git a/malloc/Makefile b/malloc/Makefile +index 0137595e17..e9a6666d22 100644 +--- a/malloc/Makefile ++++ b/malloc/Makefile +@@ -78,9 +78,9 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \ + tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \ + $(tests-static),$(tests)) + +-# Run all testes with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 that check the +-# Transparent Huge Pages support. We need exclude some tests that define +-# the ENV vars. ++# Run all tests with GLIBC_TUNABLES=glibc.malloc.hugetlb={1,2} which check ++# the Transparent Huge Pages support (1) or automatic huge page support (2). ++# We need exclude some tests that define the ENV vars. + tests-exclude-hugetlb1 = \ + tst-compathooks-off \ + tst-compathooks-on \ +@@ -93,6 +93,8 @@ tests-exclude-hugetlb1 = \ + tst-mallocstate + tests-malloc-hugetlb1 = \ + $(filter-out $(tests-exclude-hugetlb1), $(tests)) ++tests-malloc-hugetlb2 = \ ++ $(filter-out $(tests-exclude-hugetlb1), $(tests)) + + # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24. + ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes) +diff --git a/malloc/arena.c b/malloc/arena.c +index cd00c7bef4..9a6e1af2bd 100644 +--- a/malloc/arena.c ++++ b/malloc/arena.c +@@ -230,7 +230,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t) + TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t) + #endif + TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t) +-TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t) ++TUNABLE_CALLBACK_FNDECL (set_hugetlb, size_t) + #else + /* Initialization routine. */ + #include +@@ -331,7 +331,7 @@ ptmalloc_init (void) + TUNABLE_CALLBACK (set_tcache_unsorted_limit)); + # endif + TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast)); +- TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb)); ++ TUNABLE_GET (hugetlb, size_t, TUNABLE_CALLBACK (set_hugetlb)); + #else + if (__glibc_likely (_environ != NULL)) + { +diff --git a/malloc/malloc.c b/malloc/malloc.c +index 6b6ec53db1..75efdc2ee7 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -1883,6 +1883,10 @@ struct malloc_par + #if HAVE_TUNABLES + /* Transparent Large Page support. */ + INTERNAL_SIZE_T thp_pagesize; ++ /* A value different than 0 means to align mmap allocation to hp_pagesize ++ add hp_flags on flags. */ ++ INTERNAL_SIZE_T hp_pagesize; ++ int hp_flags; + #endif + + /* Memory map support */ +@@ -2440,7 +2444,10 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av) + if (mm == MAP_FAILED) + return mm; + +- madvise_thp (mm, size); ++#ifdef MAP_HUGETLB ++ if (!(extra_flags & MAP_HUGETLB)) ++ madvise_thp (mm, size); ++#endif + + /* + The offset to the start of the mmapped region is stored in the prev_size +@@ -2528,7 +2535,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold) + && (mp_.n_mmaps < mp_.n_mmaps_max))) + { +- char *mm = sysmalloc_mmap (nb, pagesize, 0, av); ++ char *mm; ++#if HAVE_TUNABLES ++ if (mp_.hp_pagesize > 0 && nb >= mp_.hp_pagesize) ++ { ++ /* There is no need to isse the THP madvise call if Huge Pages are ++ used directly. */ ++ mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av); ++ if (mm != MAP_FAILED) ++ return mm; ++ } ++#endif ++ mm = sysmalloc_mmap (nb, pagesize, 0, av); + if (mm != MAP_FAILED) + return mm; + tried_mmap = true; +@@ -2609,7 +2627,9 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + } + else if (!tried_mmap) + { +- /* We can at least try to use to mmap memory. */ ++ /* We can at least try to use to mmap memory. If new_heap fails ++ it is unlikely that trying to allocate huge pages will ++ succeed. */ + char *mm = sysmalloc_mmap (nb, pagesize, 0, av); + if (mm != MAP_FAILED) + return mm; +@@ -5383,7 +5403,7 @@ do_set_mxfast (size_t value) + + #if HAVE_TUNABLES + static __always_inline int +-do_set_hugetlb (int32_t value) ++do_set_hugetlb (size_t value) + { + if (value == 1) + { +@@ -5395,6 +5415,9 @@ do_set_hugetlb (int32_t value) + if (thp_mode == malloc_thp_mode_madvise) + mp_.thp_pagesize = __malloc_default_thp_pagesize (); + } ++ else if (value >= 2) ++ __malloc_hugepage_config (value == 2 ? 0 : value, &mp_.hp_pagesize, ++ &mp_.hp_flags); + return 0; + } + #endif +diff --git a/manual/tunables.texi b/manual/tunables.texi +index 9ca6e3f603..58a47b2e9b 100644 +--- a/manual/tunables.texi ++++ b/manual/tunables.texi +@@ -278,6 +278,13 @@ default value is @code{0}, which disables any additional support on + Setting its value to @code{1} enables the use of @code{madvise} with + @code{MADV_HUGEPAGE} after memory allocation with @code{mmap}. It is enabled + only if the system supports Transparent Huge Page (currently only on Linux). ++ ++Setting its value to @code{2} enables the use of Huge Page directly with ++@code{mmap} with the use of @code{MAP_HUGETLB} flag. The huge page size ++to use will be the default one provided by the system. A value larger than ++@code{2} specifies huge page size, which will be matched against the system ++supported ones. If provided value is invalid, @code{MAP_HUGETLB} will not ++be used. + @end deftp + + @node Dynamic Linking Tunables +diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c +index 8fb459a263..946284a33c 100644 +--- a/sysdeps/generic/malloc-hugepages.c ++++ b/sysdeps/generic/malloc-hugepages.c +@@ -29,3 +29,11 @@ __malloc_thp_mode (void) + { + return malloc_thp_mode_not_supported; + } ++ ++/* Return the default transparent huge page size. */ ++void ++__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags) ++{ ++ *pagesize = 0; ++ *flags = 0; ++} +diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h +index f5a442e328..75cda3796a 100644 +--- a/sysdeps/generic/malloc-hugepages.h ++++ b/sysdeps/generic/malloc-hugepages.h +@@ -34,4 +34,11 @@ enum malloc_thp_mode_t + + enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden; + ++/* Return the supported huge page size from the REQUESTED sizes on PAGESIZE ++ along with the required extra mmap flags on FLAGS, Requesting the value ++ of 0 returns the default huge page size, otherwise the value will be ++ matched against the sizes supported by the system. */ ++void __malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags) ++ attribute_hidden; ++ + #endif /* _MALLOC_HUGEPAGES_H */ +diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c +index 7497e07260..0e05291d61 100644 +--- a/sysdeps/unix/sysv/linux/malloc-hugepages.c ++++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c +@@ -17,8 +17,10 @@ + not, see . */ + + #include ++#include + #include + #include ++#include + + unsigned long int + __malloc_default_thp_pagesize (void) +@@ -72,3 +74,128 @@ __malloc_thp_mode (void) + } + return malloc_thp_mode_not_supported; + } ++ ++static size_t ++malloc_default_hugepage_size (void) ++{ ++ int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY); ++ if (fd == -1) ++ return 0; ++ ++ size_t hpsize = 0; ++ ++ char buf[512]; ++ off64_t off = 0; ++ while (1) ++ { ++ ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off); ++ if (r < 0) ++ break; ++ buf[r] = '\0'; ++ ++ /* If the tag is not found, read the last line again. */ ++ const char *s = strstr (buf, "Hugepagesize:"); ++ if (s == NULL) ++ { ++ char *nl = strrchr (buf, '\n'); ++ if (nl == NULL) ++ break; ++ off += (nl + 1) - buf; ++ continue; ++ } ++ ++ /* The default huge page size is in the form: ++ Hugepagesize: NUMBER kB */ ++ s += sizeof ("Hugepagesize: ") - 1; ++ for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++) ++ { ++ if (s[i] == ' ') ++ continue; ++ hpsize *= 10; ++ hpsize += s[i] - '0'; ++ } ++ hpsize *= 1024; ++ break; ++ } ++ ++ __close_nocancel (fd); ++ ++ return hpsize; ++} ++ ++static inline int ++hugepage_flags (size_t pagesize) ++{ ++ return MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT); ++} ++ ++void ++__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags) ++{ ++ *pagesize = 0; ++ *flags = 0; ++ ++ if (requested == 0) ++ { ++ *pagesize = malloc_default_hugepage_size (); ++ if (*pagesize != 0) ++ *flags = hugepage_flags (*pagesize); ++ return; ++ } ++ ++ /* Each entry represents a supported huge page in the form of: ++ hugepages-kB. */ ++ int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages", ++ O_RDONLY | O_DIRECTORY, 0); ++ if (dirfd == -1) ++ return; ++ ++ char buffer[1024]; ++ while (true) ++ { ++#if !IS_IN(libc) ++# define __getdents64 getdents64 ++#endif ++ ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer)); ++ if (ret == -1) ++ break; ++ else if (ret == 0) ++ break; ++ ++ bool found = false; ++ char *begin = buffer, *end = buffer + ret; ++ while (begin != end) ++ { ++ unsigned short int d_reclen; ++ memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen), ++ sizeof (d_reclen)); ++ const char *dname = begin + offsetof (struct dirent64, d_name); ++ begin += d_reclen; ++ ++ if (dname[0] == '.' ++ || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0) ++ continue; ++ ++ size_t hpsize = 0; ++ const char *sizestr = dname + sizeof ("hugepages-") - 1; ++ for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++) ++ { ++ hpsize *= 10; ++ hpsize += sizestr[i] - '0'; ++ } ++ hpsize *= 1024; ++ ++ if (hpsize == requested) ++ { ++ *pagesize = hpsize; ++ *flags = hugepage_flags (*pagesize); ++ found = true; ++ break; ++ } ++ } ++ if (found) ++ break; ++ } ++ ++ __close_nocancel (dirfd); ++} +-- +2.33.0 + diff --git a/malloc-hugepage-0005-malloc-Add-Huge-Page-support-to-arenas.patch b/malloc-hugepage-0005-malloc-Add-Huge-Page-support-to-arenas.patch new file mode 100644 index 0000000000000000000000000000000000000000..f759588b0dc8cf464b9b2d5ede21e2e13bb8115a --- /dev/null +++ b/malloc-hugepage-0005-malloc-Add-Huge-Page-support-to-arenas.patch @@ -0,0 +1,338 @@ +From c1beb51d08d3d7ec935b0a2419b4c6fad91d1969 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Fri, 20 Aug 2021 13:22:35 -0300 +Subject: [PATCH 5/7] malloc: Add Huge Page support to arenas + +It is enabled as default for glibc.malloc.hugetlb set to 2 or higher. +It also uses a non configurable minimum value and maximum value, +currently set respectively to 1 and 4 selected huge page size. + +The arena allocation with huge pages does not use MAP_NORESERVE. As +indicate by kernel internal documentation [1], the flag might trigger +a SIGBUS on soft page faults if at memory access there is no left +pages in the pool. + +On systems without a reserved huge pages pool, is just stress the +mmap(MAP_HUGETLB) allocation failure. To improve test coverage it is +required to create a pool with some allocated pages. + +Checked on x86_64-linux-gnu with no reserved pages, 10 reserved pages +(which trigger mmap(MAP_HUGETBL) failures) and with 256 reserved pages +(which does not trigger mmap(MAP_HUGETLB) failures). + +[1] https://www.kernel.org/doc/html/v4.18/vm/hugetlbfs_reserv.html#resv-map-modifications + +Reviewed-by: DJ Delorie +--- + malloc/Makefile | 7 ++- + malloc/arena.c | 134 +++++++++++++++++++++++++++++++++--------------- + malloc/malloc.c | 2 +- + 3 files changed, 99 insertions(+), 44 deletions(-) + +diff --git a/malloc/Makefile b/malloc/Makefile +index e9a6666d22..451eb84612 100644 +--- a/malloc/Makefile ++++ b/malloc/Makefile +@@ -91,10 +91,15 @@ tests-exclude-hugetlb1 = \ + tst-malloc-usable \ + tst-malloc-usable-tunables \ + tst-mallocstate ++# The tst-free-errno relies on the used malloc page size to mmap an ++# overlapping region. ++tests-exclude-hugetlb2 = \ ++ $(tests-exclude-hugetlb1) \ ++ tst-free-errno + tests-malloc-hugetlb1 = \ + $(filter-out $(tests-exclude-hugetlb1), $(tests)) + tests-malloc-hugetlb2 = \ +- $(filter-out $(tests-exclude-hugetlb1), $(tests)) ++ $(filter-out $(tests-exclude-hugetlb2), $(tests)) + + # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24. + ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes) +diff --git a/malloc/arena.c b/malloc/arena.c +index 9a6e1af2bd..e1852f8597 100644 +--- a/malloc/arena.c ++++ b/malloc/arena.c +@@ -41,6 +41,29 @@ + mmap threshold, so that requests with a size just below that + threshold can be fulfilled without creating too many heaps. */ + ++/* When huge pages are used to create new arenas, the maximum and minumum ++ size are based on the runtime defined huge page size. */ ++ ++static inline size_t ++heap_min_size (void) ++{ ++#if HAVE_TUNABLES ++ return mp_.hp_pagesize == 0 ? HEAP_MIN_SIZE : mp_.hp_pagesize; ++#else ++ return HEAP_MIN_SIZE; ++#endif ++} ++ ++static inline size_t ++heap_max_size (void) ++{ ++#if HAVE_TUNABLES ++ return mp_.hp_pagesize == 0 ? HEAP_MAX_SIZE : mp_.hp_pagesize * 4; ++#else ++ return HEAP_MAX_SIZE; ++#endif ++} ++ + /***************************************************************************/ + + #define top(ar_ptr) ((ar_ptr)->top) +@@ -56,10 +79,11 @@ typedef struct _heap_info + size_t size; /* Current size in bytes. */ + size_t mprotect_size; /* Size in bytes that has been mprotected + PROT_READ|PROT_WRITE. */ ++ size_t pagesize; /* Page size used when allocating the arena. */ + /* Make sure the following data is properly aligned, particularly + that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of + MALLOC_ALIGNMENT. */ +- char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK]; ++ char pad[-3 * SIZE_SZ & MALLOC_ALIGN_MASK]; + } heap_info; + + /* Get a compile-time error if the heap_info padding is not correct +@@ -125,10 +149,18 @@ static bool __malloc_initialized = false; + + /* find the heap and corresponding arena for a given ptr */ + +-#define heap_for_ptr(ptr) \ +- ((heap_info *) ((unsigned long) (ptr) & ~(HEAP_MAX_SIZE - 1))) +-#define arena_for_chunk(ptr) \ +- (chunk_main_arena (ptr) ? &main_arena : heap_for_ptr (ptr)->ar_ptr) ++static inline heap_info * ++heap_for_ptr (void *ptr) ++{ ++ size_t max_size = heap_max_size (); ++ return PTR_ALIGN_DOWN (ptr, max_size); ++} ++ ++static inline struct malloc_state * ++arena_for_chunk (mchunkptr ptr) ++{ ++ return chunk_main_arena (ptr) ? &main_arena : heap_for_ptr (ptr)->ar_ptr; ++} + + + /**************************************************************************/ +@@ -443,71 +475,72 @@ static char *aligned_heap_area; + of the page size. */ + + static heap_info * +-new_heap (size_t size, size_t top_pad) ++alloc_new_heap (size_t size, size_t top_pad, size_t pagesize, ++ int mmap_flags) + { +- size_t pagesize = GLRO (dl_pagesize); + char *p1, *p2; + unsigned long ul; + heap_info *h; ++ size_t min_size = heap_min_size (); ++ size_t max_size = heap_max_size (); + +- if (size + top_pad < HEAP_MIN_SIZE) +- size = HEAP_MIN_SIZE; +- else if (size + top_pad <= HEAP_MAX_SIZE) ++ if (size + top_pad < min_size) ++ size = min_size; ++ else if (size + top_pad <= max_size) + size += top_pad; +- else if (size > HEAP_MAX_SIZE) ++ else if (size > max_size) + return 0; + else +- size = HEAP_MAX_SIZE; ++ size = max_size; + size = ALIGN_UP (size, pagesize); + +- /* A memory region aligned to a multiple of HEAP_MAX_SIZE is needed. ++ /* A memory region aligned to a multiple of max_size is needed. + No swap space needs to be reserved for the following large + mapping (on Linux, this is the case for all non-writable mappings + anyway). */ + p2 = MAP_FAILED; + if (aligned_heap_area) + { +- p2 = (char *) MMAP (aligned_heap_area, HEAP_MAX_SIZE, PROT_NONE, +- MAP_NORESERVE); ++ p2 = (char *) MMAP (aligned_heap_area, max_size, PROT_NONE, mmap_flags); + aligned_heap_area = NULL; +- if (p2 != MAP_FAILED && ((unsigned long) p2 & (HEAP_MAX_SIZE - 1))) ++ if (p2 != MAP_FAILED && ((unsigned long) p2 & (max_size - 1))) + { +- __munmap (p2, HEAP_MAX_SIZE); ++ __munmap (p2, max_size); + p2 = MAP_FAILED; + } + } + if (p2 == MAP_FAILED) + { +- p1 = (char *) MMAP (0, HEAP_MAX_SIZE << 1, PROT_NONE, MAP_NORESERVE); ++ p1 = (char *) MMAP (0, max_size << 1, PROT_NONE, mmap_flags); + if (p1 != MAP_FAILED) + { +- p2 = (char *) (((unsigned long) p1 + (HEAP_MAX_SIZE - 1)) +- & ~(HEAP_MAX_SIZE - 1)); ++ p2 = (char *) (((unsigned long) p1 + (max_size - 1)) ++ & ~(max_size - 1)); + ul = p2 - p1; + if (ul) + __munmap (p1, ul); + else +- aligned_heap_area = p2 + HEAP_MAX_SIZE; +- __munmap (p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul); ++ aligned_heap_area = p2 + max_size; ++ __munmap (p2 + max_size, max_size - ul); + } + else + { +- /* Try to take the chance that an allocation of only HEAP_MAX_SIZE ++ /* Try to take the chance that an allocation of only max_size + is already aligned. */ +- p2 = (char *) MMAP (0, HEAP_MAX_SIZE, PROT_NONE, MAP_NORESERVE); ++ p2 = (char *) MMAP (0, max_size, PROT_NONE, mmap_flags); + if (p2 == MAP_FAILED) + return 0; + +- if ((unsigned long) p2 & (HEAP_MAX_SIZE - 1)) ++ if ((unsigned long) p2 & (max_size - 1)) + { +- __munmap (p2, HEAP_MAX_SIZE); ++ __munmap (p2, max_size); + return 0; + } + } + } + if (__mprotect (p2, size, mtag_mmap_flags | PROT_READ | PROT_WRITE) != 0) + { +- __munmap (p2, HEAP_MAX_SIZE); ++ __munmap (p2, max_size); + return 0; + } + +@@ -516,22 +549,42 @@ new_heap (size_t size, size_t top_pad) + h = (heap_info *) p2; + h->size = size; + h->mprotect_size = size; ++ h->pagesize = pagesize; + LIBC_PROBE (memory_heap_new, 2, h, h->size); + return h; + } + ++static heap_info * ++new_heap (size_t size, size_t top_pad) ++{ ++#if HAVE_TUNABLES ++ if (__glibc_unlikely (mp_.hp_pagesize != 0)) ++ { ++ /* MAP_NORESERVE is not used for huge pages because some kernel may ++ not reserve the mmap region and a subsequent access may trigger ++ a SIGBUS if there is no free pages in the pool. */ ++ heap_info *h = alloc_new_heap (size, top_pad, mp_.hp_pagesize, ++ mp_.hp_flags); ++ if (h != NULL) ++ return h; ++ } ++#endif ++ return alloc_new_heap (size, top_pad, GLRO (dl_pagesize), MAP_NORESERVE); ++} ++ + /* Grow a heap. size is automatically rounded up to a + multiple of the page size. */ + + static int + grow_heap (heap_info *h, long diff) + { +- size_t pagesize = GLRO (dl_pagesize); ++ size_t pagesize = h->pagesize; ++ size_t max_size = heap_max_size (); + long new_size; + + diff = ALIGN_UP (diff, pagesize); + new_size = (long) h->size + diff; +- if ((unsigned long) new_size > (unsigned long) HEAP_MAX_SIZE) ++ if ((unsigned long) new_size > (unsigned long) max_size) + return -1; + + if ((unsigned long) new_size > h->mprotect_size) +@@ -581,21 +634,14 @@ shrink_heap (heap_info *h, long diff) + + /* Delete a heap. */ + +-#define delete_heap(heap) \ +- do { \ +- if ((char *) (heap) + HEAP_MAX_SIZE == aligned_heap_area) \ +- aligned_heap_area = NULL; \ +- __munmap ((char *) (heap), HEAP_MAX_SIZE); \ +- } while (0) +- + static int + heap_trim (heap_info *heap, size_t pad) + { + mstate ar_ptr = heap->ar_ptr; +- unsigned long pagesz = GLRO (dl_pagesize); + mchunkptr top_chunk = top (ar_ptr), p; + heap_info *prev_heap; + long new_size, top_size, top_area, extra, prev_size, misalign; ++ size_t max_size = heap_max_size (); + + /* Can this heap go away completely? */ + while (top_chunk == chunk_at_offset (heap, sizeof (*heap))) +@@ -612,19 +658,23 @@ heap_trim (heap_info *heap, size_t pad) + assert (new_size > 0 && new_size < (long) (2 * MINSIZE)); + if (!prev_inuse (p)) + new_size += prev_size (p); +- assert (new_size > 0 && new_size < HEAP_MAX_SIZE); +- if (new_size + (HEAP_MAX_SIZE - prev_heap->size) < pad + MINSIZE + pagesz) ++ assert (new_size > 0 && new_size < max_size); ++ if (new_size + (max_size - prev_heap->size) < pad + MINSIZE ++ + heap->pagesize) + break; + ar_ptr->system_mem -= heap->size; + LIBC_PROBE (memory_heap_free, 2, heap, heap->size); +- delete_heap (heap); ++ if ((char *) heap + max_size == aligned_heap_area) ++ aligned_heap_area = NULL; ++ __munmap (heap, max_size); + heap = prev_heap; + if (!prev_inuse (p)) /* consolidate backward */ + { + p = prev_chunk (p); + unlink_chunk (ar_ptr, p); + } +- assert (((unsigned long) ((char *) p + new_size) & (pagesz - 1)) == 0); ++ assert (((unsigned long) ((char *) p + new_size) & (heap->pagesize - 1)) ++ == 0); + assert (((char *) p + new_size) == ((char *) heap + heap->size)); + top (ar_ptr) = top_chunk = p; + set_head (top_chunk, new_size | PREV_INUSE); +@@ -644,7 +694,7 @@ heap_trim (heap_info *heap, size_t pad) + return 0; + + /* Release in pagesize units and round down to the nearest page. */ +- extra = ALIGN_DOWN(top_area - pad, pagesz); ++ extra = ALIGN_DOWN(top_area - pad, heap->pagesize); + if (extra == 0) + return 0; + +diff --git a/malloc/malloc.c b/malloc/malloc.c +index 75efdc2ee7..1698d45d1e 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -5302,7 +5302,7 @@ static __always_inline int + do_set_mmap_threshold (size_t value) + { + /* Forbid setting the threshold too high. */ +- if (value <= HEAP_MAX_SIZE / 2) ++ if (value <= heap_max_size () / 2) + { + LIBC_PROBE (memory_mallopt_mmap_threshold, 3, value, mp_.mmap_threshold, + mp_.no_dyn_threshold); +-- +2.33.0 + diff --git a/malloc-hugepage-0006-malloc-Move-MORECORE-fallback-mmap-to-sysmalloc_mmap.patch b/malloc-hugepage-0006-malloc-Move-MORECORE-fallback-mmap-to-sysmalloc_mmap.patch new file mode 100644 index 0000000000000000000000000000000000000000..afc5650dab234c6ac8adf184654b694a35c5e51d --- /dev/null +++ b/malloc-hugepage-0006-malloc-Move-MORECORE-fallback-mmap-to-sysmalloc_mmap.patch @@ -0,0 +1,119 @@ +From 0849eed45daabf30a02c153695041597d6d43b2d Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Mon, 30 Aug 2021 10:56:55 -0300 +Subject: [PATCH 6/7] malloc: Move MORECORE fallback mmap to + sysmalloc_mmap_fallback + +So it can be used on hugepage code as well. + +Reviewed-by: DJ Delorie +--- + malloc/malloc.c | 85 ++++++++++++++++++++++++++++++------------------- + 1 file changed, 53 insertions(+), 32 deletions(-) + +diff --git a/malloc/malloc.c b/malloc/malloc.c +index 1698d45d1e..32050be4cc 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -2498,6 +2498,51 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av) + return chunk2mem (p); + } + ++/* ++ Allocate memory using mmap() based on S and NB requested size, aligning to ++ PAGESIZE if required. The EXTRA_FLAGS is used on mmap() call. If the call ++ succeedes S is updated with the allocated size. This is used as a fallback ++ if MORECORE fails. ++ */ ++static void * ++sysmalloc_mmap_fallback (long int *s, INTERNAL_SIZE_T nb, ++ INTERNAL_SIZE_T old_size, size_t minsize, ++ size_t pagesize, int extra_flags, mstate av) ++{ ++ long int size = *s; ++ ++ /* Cannot merge with old top, so add its size back in */ ++ if (contiguous (av)) ++ size = ALIGN_UP (size + old_size, pagesize); ++ ++ /* If we are relying on mmap as backup, then use larger units */ ++ if ((unsigned long) (size) < minsize) ++ size = minsize; ++ ++ /* Don't try if size wraps around 0 */ ++ if ((unsigned long) (size) <= (unsigned long) (nb)) ++ return MORECORE_FAILURE; ++ ++ char *mbrk = (char *) (MMAP (0, size, ++ mtag_mmap_flags | PROT_READ | PROT_WRITE, ++ extra_flags)); ++ if (mbrk == MAP_FAILED) ++ return MAP_FAILED; ++ ++#ifdef MAP_HUGETLB ++ if (!(extra_flags & MAP_HUGETLB)) ++ madvise_thp (mbrk, size); ++#endif ++ ++ /* Record that we no longer have a contiguous sbrk region. After the first ++ time mmap is used as backup, we do not ever rely on contiguous space ++ since this could incorrectly bridge regions. */ ++ set_noncontiguous (av); ++ ++ *s = size; ++ return mbrk; ++} ++ + static void * + sysmalloc (INTERNAL_SIZE_T nb, mstate av) + { +@@ -2696,38 +2741,14 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + segregated mmap region. + */ + +- /* Cannot merge with old top, so add its size back in */ +- if (contiguous (av)) +- size = ALIGN_UP (size + old_size, pagesize); +- +- /* If we are relying on mmap as backup, then use larger units */ +- if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE)) +- size = MMAP_AS_MORECORE_SIZE; +- +- /* Don't try if size wraps around 0 */ +- if ((unsigned long) (size) > (unsigned long) (nb)) +- { +- char *mbrk = (char *) (MMAP (0, size, +- mtag_mmap_flags | PROT_READ | PROT_WRITE, +- 0)); +- +- if (mbrk != MAP_FAILED) +- { +- madvise_thp (mbrk, size); +- +- /* We do not need, and cannot use, another sbrk call to find end */ +- brk = mbrk; +- snd_brk = brk + size; +- +- /* +- Record that we no longer have a contiguous sbrk region. +- After the first time mmap is used as backup, we do not +- ever rely on contiguous space since this could incorrectly +- bridge regions. +- */ +- set_noncontiguous (av); +- } +- } ++ char *mbrk = sysmalloc_mmap_fallback (&size, nb, old_size, pagesize, ++ MMAP_AS_MORECORE_SIZE, 0, av); ++ if (mbrk != MAP_FAILED) ++ { ++ /* We do not need, and cannot use, another sbrk call to find end */ ++ brk = mbrk; ++ snd_brk = brk + size; ++ } + } + + if (brk != (char *) (MORECORE_FAILURE)) +-- +2.33.0 + diff --git a/malloc-hugepage-0007-malloc-Enable-huge-page-support-on-main-arena.patch b/malloc-hugepage-0007-malloc-Enable-huge-page-support-on-main-arena.patch new file mode 100644 index 0000000000000000000000000000000000000000..fa5c9db43787173aa477ffdf193abc2385c9692f --- /dev/null +++ b/malloc-hugepage-0007-malloc-Enable-huge-page-support-on-main-arena.patch @@ -0,0 +1,86 @@ +From 0f982c182760bd7689769ee7590df592d0a132c0 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Mon, 30 Aug 2021 14:01:00 -0300 +Subject: [PATCH 7/7] malloc: Enable huge page support on main arena + +This patch adds support huge page support on main arena allocation, +enable with tunable glibc.malloc.hugetlb=2. The patch essentially +disable the __glibc_morecore() sbrk() call (similar when memory +tag does when sbrk() call does not support it) and fallback to +default page size if the memory allocation fails. + +Checked on x86_64-linux-gnu. + +Reviewed-by: DJ Delorie +--- + malloc/arena.c | 4 ++++ + malloc/malloc.c | 12 ++++++++++-- + malloc/morecore.c | 4 ---- + 3 files changed, 14 insertions(+), 6 deletions(-) + +diff --git a/malloc/arena.c b/malloc/arena.c +index e1852f8597..3ed4ef3f05 100644 +--- a/malloc/arena.c ++++ b/malloc/arena.c +@@ -364,6 +364,10 @@ ptmalloc_init (void) + # endif + TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast)); + TUNABLE_GET (hugetlb, size_t, TUNABLE_CALLBACK (set_hugetlb)); ++ if (mp_.hp_pagesize > 0) ++ /* Force mmap for main arena instead of sbrk, so hugepages are explicitly ++ used. */ ++ __always_fail_morecore = true; + #else + if (__glibc_likely (_environ != NULL)) + { +diff --git a/malloc/malloc.c b/malloc/malloc.c +index 32050be4cc..b67f2c84ee 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -2741,8 +2741,16 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) + segregated mmap region. + */ + +- char *mbrk = sysmalloc_mmap_fallback (&size, nb, old_size, pagesize, +- MMAP_AS_MORECORE_SIZE, 0, av); ++ char *mbrk = MAP_FAILED; ++#if HAVE_TUNABLES ++ if (mp_.hp_pagesize > 0) ++ mbrk = sysmalloc_mmap_fallback (&size, nb, old_size, ++ mp_.hp_pagesize, mp_.hp_pagesize, ++ mp_.hp_flags, av); ++#endif ++ if (mbrk == MAP_FAILED) ++ mbrk = sysmalloc_mmap_fallback (&size, nb, old_size, pagesize, ++ MMAP_AS_MORECORE_SIZE, 0, av); + if (mbrk != MAP_FAILED) + { + /* We do not need, and cannot use, another sbrk call to find end */ +diff --git a/malloc/morecore.c b/malloc/morecore.c +index 8168ef158c..004cd3ead4 100644 +--- a/malloc/morecore.c ++++ b/malloc/morecore.c +@@ -15,9 +15,7 @@ + License along with the GNU C Library; if not, see + . */ + +-#if defined(SHARED) || defined(USE_MTAG) + static bool __always_fail_morecore = false; +-#endif + + /* Allocate INCREMENT more bytes of data space, + and return the start of data space, or NULL on errors. +@@ -25,10 +23,8 @@ static bool __always_fail_morecore = false; + void * + __glibc_morecore (ptrdiff_t increment) + { +-#if defined(SHARED) || defined(USE_MTAG) + if (__always_fail_morecore) + return NULL; +-#endif + + void *result = (void *) __sbrk (increment); + if (result == (void *) -1) +-- +2.33.0 +