diff --git a/add-libtcmalloc_2m.so-in-gperftools-libs-rpm-package.patch b/add-libtcmalloc_2m.so-in-gperftools-libs-rpm-package.patch new file mode 100644 index 0000000000000000000000000000000000000000..7276c0b1d6c7d68aa039be6342ffe059780085f4 --- /dev/null +++ b/add-libtcmalloc_2m.so-in-gperftools-libs-rpm-package.patch @@ -0,0 +1,114 @@ +From 92ae2027b9e9985f9f3ac90a007c9df452ea9cad Mon Sep 17 00:00:00 2001 +From: liubo +Date: Sat, 13 Apr 2024 03:23:10 +0800 +Subject: [PATCH] add libtcmalloc_2m.so in gperftools-libs rpm package + +In the Ceph scenario, enabling tcmalloc huge pages can +reduce the TLB miss rate and improve performance. + +However, tcmalloc does not support huge page release. Therefore, +release logic needs to be added. In this way, when +the tcmalloc huge page is used, the memory can be +released from the OS, preventing memory overuse. + +The libtcmalloc_2m.so file is added to tcmalloc to support +hugetlb and services that require 2 MB tcmalloc. + +The native tcmalloc.so file is not affected. + +Signed-off-by: liubo +--- + Makefile.am | 10 ++++++++++ + src/common.h | 13 +++++++++++-- + src/span.h | 4 ++-- + src/system-alloc.cc | 2 +- + 4 files changed, 24 insertions(+), 5 deletions(-) + +diff --git a/Makefile.am b/Makefile.am +index 82be544..2443e80 100644 +--- a/Makefile.am ++++ b/Makefile.am +@@ -967,6 +967,16 @@ libtcmalloc_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) \ + libtcmalloc_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ + libtcmalloc_la_LIBADD = libtcmalloc_internal.la libmaybe_threads.la $(PTHREAD_LIBS) + ++# add libtcmalloc_2m.so, use 2m hugetlb for tcmalloc page alloc. ++lib_LTLIBRARIES += libtcmalloc_2m.la ++libtcmalloc_2m_la_SOURCES = $(TCMALLOC_CC) $(TCMALLOC_INCLUDES) \ ++ $(HEAP_CHECKER_SOURCES) $(libtcmalloc_internal_la_SOURCES) ++libtcmalloc_2m_la_CXXFLAGS = $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) \ ++ $(MAYBE_NO_HEAP_CHECK) $(EMERGENCY_MALLOC_DEFINE) -DTCMALLOC_PAGE_SIZE_2M ++libtcmalloc_2m_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ ++libtcmalloc_2m_la_LIBADD = libstacktrace.la libmaybe_threads.la $(PTHREAD_LIBS) ++ ++ + # same as above with without -DNDEBUG + noinst_LTLIBRARIES += libtcmalloc_internal_with_asserts.la + libtcmalloc_internal_with_asserts_la_SOURCES = $(libtcmalloc_internal_la_SOURCES) +diff --git a/src/common.h b/src/common.h +index caa3e4a..687b2c6 100644 +--- a/src/common.h ++++ b/src/common.h +@@ -72,8 +72,10 @@ static const size_t kMinAlign = 16; + // the thread cache allowance to avoid passing more free ranges to and from + // central lists. Also, larger pages are less likely to get freed. + // These two factors cause a bounded increase in memory use. +-#if defined(TCMALLOC_PAGE_SIZE_SHIFT) ++#if defined(TCMALLOC_PAGE_SIZE_SHIFT) && !defined(TCMALLOC_PAGE_SIZE_2M) + static const size_t kPageShift = TCMALLOC_PAGE_SIZE_SHIFT; ++#elif defined(TCMALLOC_PAGE_SIZE_2M) ++static const size_t kPageShift = 21; + #else + static const size_t kPageShift = 13; + #endif +@@ -83,11 +85,18 @@ static const size_t kClassSizesMax = 128; + static const size_t kMaxThreadCacheSize = 4 << 20; + + static const size_t kPageSize = 1 << kPageShift; ++#if defined(TCMALLOC_PAGE_SIZE_2M) ++static const size_t kMaxSize = 2 * 1024 * 1024; ++#else + static const size_t kMaxSize = 256 * 1024; ++#endif + static const size_t kAlignment = 8; + // For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap. ++#if defined(TCMALLOC_PAGE_SIZE_2M) ++static const size_t kMaxPages = 1 << (21 - kPageShift); ++#else + static const size_t kMaxPages = 1 << (20 - kPageShift); +- ++#endif + // Default bound on the total amount of thread caches. + #ifdef TCMALLOC_SMALL_BUT_SLOW + // Make the overall thread cache no bigger than that of a single thread +diff --git a/src/span.h b/src/span.h +index 7068893..9c89edc 100644 +--- a/src/span.h ++++ b/src/span.h +@@ -80,8 +80,8 @@ struct Span { + // iterator which lifetime is controlled explicitly. + char span_iter_space[sizeof(SpanSet::iterator)]; + }; +- unsigned int refcount : 16; // Number of non-free objects +- unsigned int sizeclass : 8; // Size-class for small objects (or 0) ++ unsigned int refcount; // Number of non-free objects ++ unsigned int sizeclass; // Size-class for small objects (or 0) + unsigned int location : 2; // Is the span on a freelist, and if so, which? + unsigned int sample : 1; // Sampled object? + bool has_span_iter : 1; // Iff span_iter_space has valid +diff --git a/src/system-alloc.cc b/src/system-alloc.cc +index 439ec69..b1bb7c9 100644 +--- a/src/system-alloc.cc ++++ b/src/system-alloc.cc +@@ -548,7 +548,7 @@ bool TCMalloc_SystemRelease(void* start, size_t length) { + result = ret != MAP_FAILED; + #else + int ret = madvise(reinterpret_cast(new_start), +- new_end - new_start, MADV_FREE); ++ new_end - new_start, MADV_DONTNEED); + + result = ret != -1; + #endif +-- +2.23.0 + diff --git a/gperftools.spec b/gperftools.spec index a5974a36ad583245b6b1aea08e49858dff7dd001..ed5bb69c95a74e297aafca2b5f4e9b185ac0ac92 100644 --- a/gperftools.spec +++ b/gperftools.spec @@ -1,6 +1,6 @@ Name: gperftools Version: 2.10 -Release: 2 +Release: 3 Summary: high-performance malloc and performance analysis tools License: BSD-3-Clause @@ -16,6 +16,7 @@ Patch9003: avoid-exceed-int-range.patch Patch9004: skip-tcm_asserts_unittest.patch Patch9005: Continue-to-release-span-until-the-end-of-one-round.patch Patch9006: gperftools-2.10-sw.patch +Patch9007: add-libtcmalloc_2m.so-in-gperftools-libs-rpm-package.patch BuildRequires: autoconf automake gcc-c++ @@ -118,6 +119,9 @@ LD_LIBRARY_PATH=./.libs make check %{_mandir}/man1/*.1.gz %changelog +* Fri Apr 12 2024 liubo - 2.10-3 +- add libtcmalloc_2m.so in gperftools-libs rpm package + * Mon May 15 2023 yangchenguang - 2.10-2 - fix loongarch64 build error and sw_64 build error