From 09088923ee8d893dd2f09bd60a2ca246e3b45cea Mon Sep 17 00:00:00 2001 From: Xie jiamei Date: Thu, 12 Jun 2025 18:20:22 +0800 Subject: [PATCH] Revert the commit which uses total l3cache for non_temporal_threshold in intel cpu This commit will cause non_temporal_threshold very high in some cases and it will not enter the nt branch. The master branch doesn't have this changes. So revert it. Signed-off-by: Xie jiamei --- glibc.spec | 62 ++++++++-------- ...l-l3cache-for-non_temporal_threshold.patch | 74 ------------------- 2 files changed, 32 insertions(+), 104 deletions(-) delete mode 100644 x86-use-total-l3cache-for-non_temporal_threshold.patch diff --git a/glibc.spec b/glibc.spec index 4973018..40742f9 100644 --- a/glibc.spec +++ b/glibc.spec @@ -66,7 +66,7 @@ ############################################################################## Name: glibc Version: 2.34 -Release: 150 +Release: 151 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -300,7 +300,7 @@ Patch213: backport-elf-ldconfig-should-skip-temporary-files-created-by-.patch Patch214: backport-ldconfig-Fixes-for-skipping-temporary-files.patch Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch -Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch +Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch Patch9002: 0001-add-base-files-for-libphtread-condition-family.patch Patch9003: 0002-add-header-files-for-libphtread_2_17_so.patch Patch9004: 0003-add-build-script-and-files-of-libpthread_2_17_so.patch @@ -320,34 +320,33 @@ Patch9017: 0001-elf-dynamic-linker-load-shared-object-use-hugepage-a.patch Patch9018: 0002-elf-ld.so-add-testcase-for-ld.so-load-shared-object-.patch Patch9019: 0003-elf-ld.so-use-special-mmap-for-hugepage-to-get-symbo.patch Patch9020: malloc-use-__get_nprocs-replace-__get_nprocs_sched.patch -Patch9021: x86-use-total-l3cache-for-non_temporal_threshold.patch -Patch9022: login-Add-back-libutil-as-an-empty-library.patch -Patch9023: malloc-Fix-malloc-debug-for-2.35-onwards.patch -Patch9024: LoongArch-Port.patch -Patch9025: 1_6-LoongArch-Optimize-string-functions-memcpy-memmove.patch -Patch9026: 2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch -Patch9027: 3_6-LoongArch-Optimize-string-function-memset.patch -Patch9028: 4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch -Patch9029: 5_6-LoongArch-Optimize-string-function-strcpy.patch -Patch9030: 6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch -Patch9031: math-Fix-asin-and-acos-invalid-exception-with-old-gc.patch -Patch9032: LoongArch-Fix-ptr-mangling-demangling-and-SHMLBA.patch -Patch9033: LoongArch-Add-static-PIE-support.patch -Patch9034: LoongArch-Fix-the-condition-to-use-PC-relative-addre.patch -Patch9035: LoongArch-Further-refine-the-condition-to-enable-sta.patch -Patch9036: add-pthread_cond_clockwait-GLIBC_2_28.patch -Patch9037: 0001-ld.so-support-ld.so-mmap-hugetlb-hugepage-according-.patch -Patch9038: 0002-elf-ld.so-keep-compatible-with-the-original-policy-o.patch -Patch9039: 0003-elf-ld.so-remove-_mmap_hole-when-ld.so-mmap-PT_LOAD-.patch -Patch9040: elf-ld.so-add-MAP_NORESERVE-flag-for-the-first-mmap-2MB-contig.patch -Patch9041: elf-ld.so-prohibit-multiple-i-options-and-do-not-allow-i-speci.patch -Patch9042: elf-ld.so-Consider-maybe-existing-hole-between-PT_LO.patch -Patch9043: add-GB18030-2022-charmap-BZ-30243.patch -Patch9044: add-Wl-z-noseparate-code-for-so.patch -Patch9045: fix-Segmentation-fault-in-nss-module.patch -Patch9046: fix_nss_database_check_reload_and_get_memleak.patch -Patch9047: inet-fix-warn-unused-result.patch -Patch9048: LoongArch-Add-missing-relocation-type-in-elf.h.patch +Patch9021: login-Add-back-libutil-as-an-empty-library.patch +Patch9022: malloc-Fix-malloc-debug-for-2.35-onwards.patch +Patch9023: LoongArch-Port.patch +Patch9024: 1_6-LoongArch-Optimize-string-functions-memcpy-memmove.patch +Patch9025: 2_6-LoongArch-Optimize-string-functions-strchr-strchrnul.patch +Patch9026: 3_6-LoongArch-Optimize-string-function-memset.patch +Patch9027: 4_6-LoongArch-Optimize-string-functions-strcmp-strncmp.patch +Patch9028: 5_6-LoongArch-Optimize-string-function-strcpy.patch +Patch9029: 6_6-LoongArch-Optimize-string-functions-strlen-strnlen.patch +Patch9030: math-Fix-asin-and-acos-invalid-exception-with-old-gc.patch +Patch9031: LoongArch-Fix-ptr-mangling-demangling-and-SHMLBA.patch +Patch9032: LoongArch-Add-static-PIE-support.patch +Patch9033: LoongArch-Fix-the-condition-to-use-PC-relative-addre.patch +Patch9034: LoongArch-Further-refine-the-condition-to-enable-sta.patch +Patch9035: add-pthread_cond_clockwait-GLIBC_2_28.patch +Patch9036: 0001-ld.so-support-ld.so-mmap-hugetlb-hugepage-according-.patch +Patch9037: 0002-elf-ld.so-keep-compatible-with-the-original-policy-o.patch +Patch9038: 0003-elf-ld.so-remove-_mmap_hole-when-ld.so-mmap-PT_LOAD-.patch +Patch9039: elf-ld.so-add-MAP_NORESERVE-flag-for-the-first-mmap-2MB-contig.patch +Patch9040: elf-ld.so-prohibit-multiple-i-options-and-do-not-allow-i-speci.patch +Patch9041: elf-ld.so-Consider-maybe-existing-hole-between-PT_LO.patch +Patch9042: add-GB18030-2022-charmap-BZ-30243.patch +Patch9043: add-Wl-z-noseparate-code-for-so.patch +Patch9044: fix-Segmentation-fault-in-nss-module.patch +Patch9045: fix_nss_database_check_reload_and_get_memleak.patch +Patch9046: inet-fix-warn-unused-result.patch +Patch9047: LoongArch-Add-missing-relocation-type-in-elf.h.patch Provides: ldconfig rtld(GNU_HASH) bundled(gnulib) @@ -1513,6 +1512,9 @@ fi %endif %changelog +* Thu June 12 2025 Xie jiamei - 2.34-151 +- revert the patch(x86: use total l3cache for non_temporal_threshold in intel cpu) + * Fri May 10 2024 shixuantong - 2.34-150 - Type:bugfix - ID: diff --git a/x86-use-total-l3cache-for-non_temporal_threshold.patch b/x86-use-total-l3cache-for-non_temporal_threshold.patch deleted file mode 100644 index a5eb85c..0000000 --- a/x86-use-total-l3cache-for-non_temporal_threshold.patch +++ /dev/null @@ -1,74 +0,0 @@ -From af0606f5d626b92d6e59da3a797548e9daab5580 Mon Sep 17 00:00:00 2001 -From: Qingqing Li -Date: Sat, 25 Jun 2022 15:36:44 +0800 -Subject: [PATCH] x86: use total l3cache for non_temporal_threshold - -Below glibc upstream patch modified the default behavoir for large size of memcpy, -such as 1M~10M. revert it and use GLIBC_TUNABLES="glibc.cpu.x86_non_temporal_threshold=xxx" -to tune the application when needed. - -d3c57027470b78dba79c6d931e4e409b1fecfc80 -Author: Patrick McGehearty -Date: Mon Sep 28 20:11:28 2020 +0000 - - Reversing calculation of __x86_shared_non_temporal_threshold - - The __x86_shared_non_temporal_threshold determines when memcpy on x86 - uses non_temporal stores to avoid pushing other data out of the last - level cache. - uses non_temporal stores to avoid pushing other data out of the last - level cache. - - This patch proposes to revert the calculation change made by H.J. Lu's - patch of June 2, 2017. - - H.J. Lu's patch selected a threshold suitable for a single thread - getting maximum performance. It was tuned using the single threaded - large memcpy micro benchmark on an 8 core processor. The last change - changes the threshold from using 3/4 of one thread's share of the - cache to using 3/4 of the entire cache of a multi-threaded system - before switching to non-temporal stores. Multi-threaded systems with - more than a few threads are server-class and typically have many - active threads. If one thread consumes 3/4 of the available cache for - all threads, it will cause other active threads to have data removed - from the cache. Two examples show the range of the effect. John - McCalpin's widely parallel Stream benchmark, which runs in parallel - and fetches data sequentially, saw a 20% slowdown with this patch on - an internal system test of 128 threads. This regression was discovered - when comparing OL8 performance to OL7. An example that compares - normal stores to non-temporal stores may be found at - https://vgatherps.github.io/2018-09-02-nontemporal/. A simple test - shows performance loss of 400 to 500% due to a failure to use - nontemporal stores. These performance losses are most likely to occur - when the system load is heaviest and good performance is critical. - - The tunable x86_non_temporal_threshold can be used to override the - default for the knowledgable user who really wants maximum cache - allocation to a single thread in a multi-threaded system. - The manual entry for the tunable has been expanded to provide - more information about its purpose. - - modified: sysdeps/x86/cacheinfo.c - modified: manual/tunables.texi ---- - sysdeps/x86/dl-cacheinfo.h | 4 ++++++ - 1 file changed, 4 insertions(+) - -diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h -index e6c94dfd..c5e8deb3 100644 ---- a/sysdeps/x86/dl-cacheinfo.h -+++ b/sysdeps/x86/dl-cacheinfo.h -@@ -924,6 +924,10 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) - if (tunable_size != 0) - shared = tunable_size; - -+ /* keep x86 to use the same non_temporal_threshold like glibc2.28 */ -+ if (threads != 0) -+ non_temporal_threshold *= threads; -+ - tunable_size = TUNABLE_GET (x86_non_temporal_threshold, long int, NULL); - /* NB: Ignore the default value 0. */ - if (tunable_size != 0) --- -2.30.0 - -- Gitee