diff --git a/Apply-the-Makefile-sorting-fix.patch b/Apply-the-Makefile-sorting-fix.patch new file mode 100644 index 0000000000000000000000000000000000000000..70efa969646c269c1d8c9f10cbc953a461384fbd --- /dev/null +++ b/Apply-the-Makefile-sorting-fix.patch @@ -0,0 +1,156 @@ +From a477851fd2b5f86bc981d4e293ced4d837797e24 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Thu, 15 Feb 2024 11:19:56 -0800 +Subject: [PATCH] Apply the Makefile sorting fix + +Apply the Makefile sorting fix generated by sort-makefile-lines.py. + +(cherry picked from commit ef7f4b1fef67430a8f3cfc77fa6aada2add851d7) +--- + sysdeps/x86/Makefile | 6 +++--- + sysdeps/x86_64/Makefile | 10 +++++----- + sysdeps/x86_64/fpu/multiarch/Makefile | 14 +++++++------- + sysdeps/x86_64/multiarch/Makefile | 4 ++-- + 4 files changed, 17 insertions(+), 17 deletions(-) + +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index f1f00825a6..02ecfbf146 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -15,17 +15,17 @@ CFLAGS-dl-get-cpu-features.os += $(rtld-early-cflags) + CFLAGS-get-cpuid-feature-leaf.o += $(no-stack-protector) + + tests += \ +- tst-get-cpu-features \ +- tst-get-cpu-features-static \ + tst-cpu-features-cpuinfo \ + tst-cpu-features-cpuinfo-static \ + tst-cpu-features-supports \ + tst-cpu-features-supports-static \ ++ tst-get-cpu-features \ ++ tst-get-cpu-features-static \ + # tests + tests-static += \ +- tst-get-cpu-features-static \ + tst-cpu-features-cpuinfo-static \ + tst-cpu-features-supports-static \ ++ tst-get-cpu-features-static \ + # tests-static + ifeq (yes,$(have-ifunc)) + ifeq (yes,$(have-gcc-ifunc)) +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index 36be2f6d56..75c5029b36 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -224,6 +224,10 @@ sysdep-dl-routines += dl-cet + + tests += \ + tst-cet-legacy-1 \ ++ tst-cet-legacy-10 \ ++ tst-cet-legacy-10-static \ ++ tst-cet-legacy-10a \ ++ tst-cet-legacy-10a-static \ + tst-cet-legacy-1a \ + tst-cet-legacy-2 \ + tst-cet-legacy-2a \ +@@ -235,15 +239,11 @@ tests += \ + tst-cet-legacy-8 \ + tst-cet-legacy-9 \ + tst-cet-legacy-9-static \ +- tst-cet-legacy-10 \ +- tst-cet-legacy-10-static \ +- tst-cet-legacy-10a \ +- tst-cet-legacy-10a-static \ + # tests + tests-static += \ +- tst-cet-legacy-9-static \ + tst-cet-legacy-10-static \ + tst-cet-legacy-10a-static \ ++ tst-cet-legacy-9-static \ + # tests-static + tst-cet-legacy-1a-ARGS = -- $(host-test-program-cmd) + +diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile +index ea81753b70..e1a490dd98 100644 +--- a/sysdeps/x86_64/fpu/multiarch/Makefile ++++ b/sysdeps/x86_64/fpu/multiarch/Makefile +@@ -4,10 +4,10 @@ libm-sysdep_routines += \ + s_ceilf-c \ + s_floor-c \ + s_floorf-c \ +- s_rint-c \ +- s_rintf-c \ + s_nearbyint-c \ + s_nearbyintf-c \ ++ s_rint-c \ ++ s_rintf-c \ + s_roundeven-c \ + s_roundevenf-c \ + s_trunc-c \ +@@ -21,10 +21,10 @@ libm-sysdep_routines += \ + s_floorf-sse4_1 \ + s_nearbyint-sse4_1 \ + s_nearbyintf-sse4_1 \ +- s_roundeven-sse4_1 \ +- s_roundevenf-sse4_1 \ + s_rint-sse4_1 \ + s_rintf-sse4_1 \ ++ s_roundeven-sse4_1 \ ++ s_roundevenf-sse4_1 \ + s_trunc-sse4_1 \ + s_truncf-sse4_1 \ + # libm-sysdep_routines +@@ -84,12 +84,12 @@ CFLAGS-s_cosf-fma.c = -mfma -mavx2 + CFLAGS-s_sincosf-fma.c = -mfma -mavx2 + + libm-sysdep_routines += \ ++ e_asin-fma4 \ ++ e_atan2-fma4 \ + e_exp-fma4 \ + e_log-fma4 \ + e_pow-fma4 \ +- e_asin-fma4 \ + s_atan-fma4 \ +- e_atan2-fma4 \ + s_sin-fma4 \ + s_sincos-fma4 \ + s_tan-fma4 \ +@@ -106,10 +106,10 @@ CFLAGS-s_tan-fma4.c = -mfma4 + CFLAGS-s_sincos-fma4.c = -mfma4 + + libm-sysdep_routines += \ ++ e_atan2-avx \ + e_exp-avx \ + e_log-avx \ + s_atan-avx \ +- e_atan2-avx \ + s_sin-avx \ + s_sincos-avx \ + s_tan-avx \ +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index e1e894c963..d3d2270394 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -4,8 +4,8 @@ sysdep_routines += \ + memchr-avx2 \ + memchr-avx2-rtm \ + memchr-evex \ +- memchr-evex512 \ + memchr-evex-rtm \ ++ memchr-evex512 \ + memchr-sse2 \ + memcmp-avx2-movbe \ + memcmp-avx2-movbe-rtm \ +@@ -37,8 +37,8 @@ sysdep_routines += \ + rawmemchr-avx2 \ + rawmemchr-avx2-rtm \ + rawmemchr-evex \ +- rawmemchr-evex512 \ + rawmemchr-evex-rtm \ ++ rawmemchr-evex512 \ + rawmemchr-sse2 \ + stpcpy-avx2 \ + stpcpy-avx2-rtm \ +-- +2.27.0 + diff --git a/glibc.spec b/glibc.spec index a6b7446b34e3e6ebef8d15151cb5c48844d9b55a..583b11b03c558ab5e0aaa70197325259e7c266e8 100644 --- a/glibc.spec +++ b/glibc.spec @@ -67,7 +67,7 @@ ############################################################################## Name: glibc Version: 2.38 -Release: 66 +Release: 67 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -351,6 +351,14 @@ Patch261: x32-cet-Support-shadow-stack-during-startup-for-Linu.patch Patch262: x86-Update-_dl_tlsdesc_dynamic-to-preserve-caller-sa.patch Patch263: x86-64-Update-_dl_tlsdesc_dynamic-to-preserve-AMX-re.patch Patch264: x86-64-Allocate-state-buffer-space-for-RDI-RSI-and-R.patch +Patch265: Apply-the-Makefile-sorting-fix.patch +Patch266: x86-Use-separate-variable-for-TLSDESC-XSAVE-XSAVEC-s.patch +Patch267: x86-Link-tst-gnu2-tls2-x86-noxsave-c-xsavec-with-lib.patch +Patch268: x86-Optimize-xstate-size-calculation.patch +Patch269: i386-Update-___tls_get_addr-to-preserve-vector-regis.patch +Patch270: x86-64-Add-GLIBC_ABI_GNU2_TLS-version-BZ-33129.patch +Patch271: i386-Also-add-GLIBC_ABI_GNU2_TLS-version-BZ-33129.patch +Patch272: i386-Add-GLIBC_ABI_GNU_TLS-version-BZ-33221.patch #openEuler patch list Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch @@ -1578,6 +1586,16 @@ fi %endif %changelog +* Fri Aug 29 2025 Qingqing Li - 2.38-67 +- i386: Add GLIBC_ABI_GNU_TLS version [BZ #33221] +- Also add GLIBC_ABI_GNU2_TLS version [BZ #33129] +- Add GLIBC_ABI_GNU2_TLS version [BZ #33129] +- Update ___tls_get_addr to preserve vector registers +- x86: Optimize xstate size calculation +- x86: Link tst-gnu2-tls2-x86-noxsave{,c,xsavec} with libpthread +- x86: Use separate variable for TLSDESC XSAVE/XSAVEC state size (bug 32810) +- Apply the Makefile sorting fix + * Wed Aug 20 2025 Qingqing Li - 2.38-66 - sync from glibc upstream 2.38 branch diff --git a/i386-Add-GLIBC_ABI_GNU_TLS-version-BZ-33221.patch b/i386-Add-GLIBC_ABI_GNU_TLS-version-BZ-33221.patch new file mode 100644 index 0000000000000000000000000000000000000000..d7d94f048b969b6ec0ddd4ec0c2cf617328c635a --- /dev/null +++ b/i386-Add-GLIBC_ABI_GNU_TLS-version-BZ-33221.patch @@ -0,0 +1,66 @@ +From a921ae4701f3b4b804ab7bc8f9e7a1e59001b083 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Mon, 28 Jul 2025 12:16:11 -0700 +Subject: [PATCH] i386: Add GLIBC_ABI_GNU_TLS version [BZ #33221] + +On i386, programs and shared libraries with __thread usage may fail +silently at run-time against glibc without the TLS run-time fix for: + +https://sourceware.org/bugzilla/show_bug.cgi?id=32996 + +Add GLIBC_ABI_GNU_TLS version to indicate that glibc has the working +GNU TLS run-time. Linker can add the GLIBC_ABI_GNU_TLS version to +binaries which depend on the working TLS run-time so that such programs +and shared libraries will fail to load and run at run-time against +libc.so without the GLIBC_ABI_GNU_TLS version, instead of fail silently +at random. + +This fixes BZ #33221. + +Signed-off-by: H.J. Lu +Reviewed-by: Sam James +(cherry picked from commit ed1b7a5a489ab555a27fad9c101ebe2e1c1ba881) +--- + sysdeps/i386/Makefile | 9 +++++++++ + sysdeps/i386/Versions | 5 +++++ + 2 files changed, 14 insertions(+) + +diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile +index ee6470d78e..c0c017b899 100644 +--- a/sysdeps/i386/Makefile ++++ b/sysdeps/i386/Makefile +@@ -60,6 +60,15 @@ $(objpfx)tst-ld-sse-use.out: ../sysdeps/i386/tst-ld-sse-use.sh $(objpfx)ld.so + @echo "Checking ld.so for SSE register use. This will take a few seconds..." + $(BASH) $< $(objpfx) '$(NM)' '$(OBJDUMP)' '$(READELF)' > $@; \ + $(evaluate-test) ++ ++tests-special += $(objpfx)check-gnu-tls.out ++ ++$(objpfx)check-gnu-tls.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_GNU_TLS > $@; \ ++ $(evaluate-test) ++generated += check-gnu-tls.out + else + CFLAGS-.os += $(if $(filter rtld-%.os,$(@F)), $(rtld-CFLAGS)) + endif +diff --git a/sysdeps/i386/Versions b/sysdeps/i386/Versions +index 36e23b466a..9c84c8ef04 100644 +--- a/sysdeps/i386/Versions ++++ b/sysdeps/i386/Versions +@@ -28,6 +28,11 @@ libc { + GLIBC_2.13 { + __fentry__; + } ++ GLIBC_ABI_GNU_TLS { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + } + libm { + GLIBC_2.1 { +-- +2.27.0 + diff --git a/i386-Also-add-GLIBC_ABI_GNU2_TLS-version-BZ-33129.patch b/i386-Also-add-GLIBC_ABI_GNU2_TLS-version-BZ-33129.patch new file mode 100644 index 0000000000000000000000000000000000000000..21e2bb2b7c1e22abac016c4232a2573f8e83ca18 --- /dev/null +++ b/i386-Also-add-GLIBC_ABI_GNU2_TLS-version-BZ-33129.patch @@ -0,0 +1,98 @@ +From 0769ad2cdd670d89c75eb6ac1cbc25f5c2d92ce5 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Mon, 18 Aug 2025 09:06:48 -0700 +Subject: [PATCH] i386: Also add GLIBC_ABI_GNU2_TLS version [BZ #33129] + +Since the GNU2 TLS run-time bug: + +https://sourceware.org/bugzilla/show_bug.cgi?id=31372 + +affects both i386 and x86-64, also add GLIBC_ABI_GNU2_TLS version to i386 +to indicate the working GNU2 TLS run-time. For x86-64, the additional +GNU2 TLS run-time bug fix is needed for + +https://sourceware.org/bugzilla/show_bug.cgi?id=31501 + +Signed-off-by: H.J. Lu +Reviewed-by: Sam James +(cherry picked from commit bd4628f3f18ac312408782eea450429c6f044860) +--- + sysdeps/x86/Makefile | 9 +++++++++ + sysdeps/x86/Versions | 5 +++++ + sysdeps/x86_64/Makefile | 9 --------- + sysdeps/x86_64/Versions | 5 ----- + 4 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index d39c2e8968..0ab42b14ba 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -125,6 +125,15 @@ LDFLAGS-tst-tls23 += -rdynamic + tst-tls23-mod.so-no-z-defs = yes + + $(objpfx)tst-tls23-mod.so: $(libsupport) ++ ++tests-special += $(objpfx)check-gnu2-tls.out ++ ++$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_GNU2_TLS > $@; \ ++ $(evaluate-test) ++generated += check-gnu2-tls.out + endif + + ifeq ($(subdir),math) +diff --git a/sysdeps/x86/Versions b/sysdeps/x86/Versions +index 4b10c4b5d7..e8dcfccbe4 100644 +--- a/sysdeps/x86/Versions ++++ b/sysdeps/x86/Versions +@@ -7,4 +7,9 @@ libc { + GLIBC_2.33 { + __x86_get_cpuid_feature_leaf; + } ++ GLIBC_ABI_GNU2_TLS { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + } +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index 191cf7f5b0..d83109b632 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -180,15 +180,6 @@ $(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so + | grep GLIBC_ABI_DT_X86_64_PLT > $@; \ + $(evaluate-test) + generated += check-dt-x86-64-plt.out +- +-tests-special += $(objpfx)check-gnu2-tls.out +- +-$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so +- LC_ALL=C $(READELF) -V -W $< \ +- | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ +- | grep GLIBC_ABI_GNU2_TLS > $@; \ +- $(evaluate-test) +-generated += check-gnu2-tls.out + endif # $(subdir) == elf + + ifeq ($(subdir),csu) +diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions +index 0a759029e5..6a989ad3b3 100644 +--- a/sysdeps/x86_64/Versions ++++ b/sysdeps/x86_64/Versions +@@ -5,11 +5,6 @@ libc { + GLIBC_2.13 { + __fentry__; + } +- GLIBC_ABI_GNU2_TLS { +- # This symbol is used only for empty version map and will be removed +- # by scripts/versions.awk. +- __placeholder_only_for_empty_version_map; +- } + GLIBC_ABI_DT_X86_64_PLT { + # This symbol is used only for empty version map and will be removed + # by scripts/versions.awk. +-- +2.27.0 + diff --git a/i386-Update-___tls_get_addr-to-preserve-vector-regis.patch b/i386-Update-___tls_get_addr-to-preserve-vector-regis.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb7b9703e5a90a48b0300d9ca9f839705d07eef9 --- /dev/null +++ b/i386-Update-___tls_get_addr-to-preserve-vector-regis.patch @@ -0,0 +1,1188 @@ +From 1ea16a207ce05e90d4d24460b394452f64ae8f37 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Mon, 9 Jun 2025 05:22:10 +0800 +Subject: [PATCH] i386: Update ___tls_get_addr to preserve vector registers + +Compiler generates the following instruction sequence for dynamic TLS +access: + + leal tls_var@tlsgd(,%ebx,1), %eax + call ___tls_get_addr@PLT + +CALL instruction is transparent to compiler which assumes all registers, +except for EFLAGS, AX, CX, and DX, are unchanged after CALL. But +___tls_get_addr is a normal function which doesn't preserve any vector +registers. + +1. Rename the generic __tls_get_addr function to ___tls_get_addr_internal. +2. Change ___tls_get_addr to a wrapper function with implementations for +FNSAVE, FXSAVE, XSAVE and XSAVEC to save and restore all vector registers. +3. dl-tlsdesc-dynamic.h has: + +_dl_tlsdesc_dynamic: + /* Like all TLS resolvers, preserve call-clobbered registers. + We need two scratch regs anyway. */ + subl $32, %esp + cfi_adjust_cfa_offset (32) + +It is wrong to use + + movl %ebx, -28(%esp) + movl %esp, %ebx + cfi_def_cfa_register(%ebx) + ... + mov %ebx, %esp + cfi_def_cfa_register(%esp) + movl -28(%esp), %ebx + +to preserve EBX on stack. Fix it with: + + movl %ebx, 28(%esp) + movl %esp, %ebx + cfi_def_cfa_register(%ebx) + ... + mov %ebx, %esp + cfi_def_cfa_register(%esp) + movl 28(%esp), %ebx + +4. Update _dl_tlsdesc_dynamic to call ___tls_get_addr_internal directly. +5. Add have-test-mtls-traditional to compile tst-tls23-mod.c with +traditional TLS variant to verify the fix. +6. Define DL_RUNTIME_RESOLVE_REALIGN_STACK in sysdeps/x86/sysdep.h. + +This fixes BZ #32996. + +Co-Authored-By: Adhemerval Zanella +Signed-off-by: H.J. Lu +Reviewed-by: Adhemerval Zanella +(cherry picked from commit 848f0e46f03f22404ed9a8aabf3fd5ce8809a1be) + +Conflict: Aadpt the context of sysdeps/loongarch/preconfigure +--- + configure | 35 +++++ + configure.ac | 25 ++++ + elf/Makefile | 9 ++ + elf/tst-tls23-mod.c | 32 +++++ + elf/tst-tls23.c | 106 +++++++++++++++ + .../dl-trampoline-save.h => elf/tst-tls23.h | 34 +++-- + sysdeps/aarch64/preconfigure | 1 + + sysdeps/i386/Makefile | 4 +- + sysdeps/i386/dl-tls-get-addr.c | 68 ++++++++++ + sysdeps/i386/dl-tls.h | 28 +--- + sysdeps/i386/dl-tlsdesc-dynamic.h | 108 +-------------- + sysdeps/i386/dl-tlsdesc.S | 17 --- + sysdeps/i386/tls-get-addr-wrapper.h | 127 ++++++++++++++++++ + sysdeps/i386/tls_get_addr.S | 57 ++++++++ + sysdeps/i386/tls_get_addr.h | 42 ++++++ + sysdeps/loongarch/preconfigure | 1 + + sysdeps/loongarch/preconfigure.ac | 1 + + sysdeps/powerpc/Makefile | 5 + + sysdeps/x86/Makefile | 16 ++- + sysdeps/x86/sysdep.h | 23 ++++ + sysdeps/x86/tst-tls23.c | 22 +++ + sysdeps/x86/tst-tls23.h | 35 +++++ + sysdeps/x86_64/Makefile | 3 - + sysdeps/x86_64/dl-tlsdesc.S | 1 - + sysdeps/x86_64/dl-trampoline.S | 1 - + 25 files changed, 632 insertions(+), 169 deletions(-) + create mode 100644 elf/tst-tls23-mod.c + create mode 100644 elf/tst-tls23.c + rename sysdeps/x86_64/dl-trampoline-save.h => elf/tst-tls23.h (52%) + create mode 100644 sysdeps/i386/dl-tls-get-addr.c + create mode 100644 sysdeps/i386/tls-get-addr-wrapper.h + create mode 100644 sysdeps/i386/tls_get_addr.S + create mode 100644 sysdeps/i386/tls_get_addr.h + create mode 100644 sysdeps/x86/tst-tls23.c + create mode 100644 sysdeps/x86/tst-tls23.h + +diff --git a/configure b/configure +index 4bfd6c5c..2fb97d31 100755 +--- a/configure ++++ b/configure +@@ -4918,6 +4918,9 @@ libc_config_ok=no + # whether to use such directories. + with_fp_cond=1 + ++# A preconfigure script may define another name to traditional TLS variant ++mtls_traditional=gnu ++ + if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null` + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5 +@@ -7198,6 +7201,38 @@ printf "%s\n" "$libc_cv_mtls_dialect_gnu2" >&6; } + config_vars="$config_vars + have-mtls-dialect-gnu2 = $libc_cv_mtls_dialect_gnu2" + ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for traditional tls support" >&5 ++printf %s "checking for traditional tls support... " >&6; } ++if test ${libc_cv_test_mtls_traditional+y} ++then : ++ printf %s "(cached) " >&6 ++else $as_nop ++ cat > conftest.c <&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; } ++then ++ libc_cv_test_mtls_traditional=$mtls_traditional ++else ++ libc_cv_test_mtls_traditional=no ++fi ++rm -f conftest* ++fi ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_test_mtls_traditional" >&5 ++printf "%s\n" "$libc_cv_test_mtls_traditional" >&6; } ++config_vars="$config_vars ++have-test-mtls-traditional = $libc_cv_test_mtls_traditional" ++ + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Wno-ignored-attributes is required for aliases" >&5 + printf %s "checking if -Wno-ignored-attributes is required for aliases... " >&6; } + if test ${libc_cv_wno_ignored_attributes+y} +diff --git a/configure.ac b/configure.ac +index 14391840..ffb43787 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -517,6 +517,9 @@ libc_config_ok=no + # whether to use such directories. + with_fp_cond=1 + ++# A preconfigure script may define another name to traditional TLS variant ++mtls_traditional=gnu ++ + dnl Let sysdeps/*/preconfigure act here. + LIBC_PRECONFIGURE([$srcdir], [for sysdeps]) + +@@ -1382,6 +1385,28 @@ rm -f conftest*]) + AC_SUBST(libc_cv_mtls_dialect_gnu2) + LIBC_CONFIG_VAR([have-mtls-dialect-gnu2], [$libc_cv_mtls_dialect_gnu2]) + ++dnl Check if CC supports traditional tls. ++AC_CACHE_CHECK([for traditional tls support], ++ libc_cv_test_mtls_traditional, ++[dnl ++cat > conftest.c <&AS_MESSAGE_LOG_FD]) ++then ++ libc_cv_test_mtls_traditional=$mtls_traditional ++else ++ libc_cv_test_mtls_traditional=no ++fi ++rm -f conftest*]) ++LIBC_CONFIG_VAR([have-test-mtls-traditional], ++ [$libc_cv_test_mtls_traditional]) ++ + dnl clang emits an warning for a double alias redirection, to warn the + dnl original symbol is sed even when weak definition overrides it. + dnl It is a usual pattern for weak_alias, where multiple alias point to +diff --git a/elf/Makefile b/elf/Makefile +index 4c0cd4fa..b5ae0519 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -466,6 +466,7 @@ tests += \ + tst-tls19 \ + tst-tls20 \ + tst-tls21 \ ++ tst-tls23 \ + tst-tlsalign \ + tst-tlsalign-extern \ + tst-tlsgap \ +@@ -922,6 +923,7 @@ modules-names += \ + tst-tls19mod3 \ + tst-tls20mod-bad \ + tst-tls21mod \ ++ tst-tls23-mod \ + tst-tlsalign-lib \ + tst-tlsgap-mod0 \ + tst-tlsgap-mod1 \ +@@ -3095,6 +3097,13 @@ CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=gnu2 + CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=gnu2 + endif + ++$(objpfx)tst-tls23: $(shared-thread-library) ++$(objpfx)tst-tls23.out: $(objpfx)tst-tls23-mod.so ++ ++ifneq (no,$(have-test-mtls-traditional)) ++CFLAGS-tst-tls23-mod.c += -mtls-dialect=$(have-test-mtls-traditional) ++endif ++ + $(objpfx)tst-recursive-tls: $(objpfx)tst-recursive-tlsmallocmod.so + # More objects than DTV_SURPLUS, to trigger DTV reallocation. + $(objpfx)tst-recursive-tls.out: \ +diff --git a/elf/tst-tls23-mod.c b/elf/tst-tls23-mod.c +new file mode 100644 +index 00000000..3ee4c70e +--- /dev/null ++++ b/elf/tst-tls23-mod.c +@@ -0,0 +1,32 @@ ++/* DSO used by tst-tls23. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++__thread struct tls tls_var0 __attribute__ ((visibility ("hidden"))); ++ ++struct tls * ++apply_tls (struct tls *p) ++{ ++ INIT_TLS_CALL (); ++ BEFORE_TLS_CALL (); ++ tls_var0 = *p; ++ struct tls *ret = &tls_var0; ++ AFTER_TLS_CALL (); ++ return ret; ++} +diff --git a/elf/tst-tls23.c b/elf/tst-tls23.c +new file mode 100644 +index 00000000..afe594c0 +--- /dev/null ++++ b/elf/tst-tls23.c +@@ -0,0 +1,106 @@ ++/* Test that __tls_get_addr preserves caller-saved registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef IS_SUPPORTED ++# define IS_SUPPORTED() true ++#endif ++ ++/* An architecture can define it to clobber caller-saved registers in ++ malloc below to verify that __tls_get_addr won't change caller-saved ++ registers. */ ++#ifndef PREPARE_MALLOC ++# define PREPARE_MALLOC() ++#endif ++ ++extern void * __libc_malloc (size_t); ++ ++size_t malloc_counter = 0; ++ ++void * ++malloc (size_t n) ++{ ++ PREPARE_MALLOC (); ++ malloc_counter++; ++ return __libc_malloc (n); ++} ++ ++static void *mod; ++static const char *modname = "tst-tls23-mod.so"; ++ ++static void ++open_mod (void) ++{ ++ mod = xdlopen (modname, RTLD_LAZY); ++ printf ("open %s\n", modname); ++} ++ ++static void ++close_mod (void) ++{ ++ xdlclose (mod); ++ mod = NULL; ++ printf ("close %s\n", modname); ++} ++ ++static void ++access_mod (const char *sym) ++{ ++ struct tls var = { -4, -4, -4, -4 }; ++ struct tls *(*f) (struct tls *) = xdlsym (mod, sym); ++ /* Check that our malloc is called. */ ++ malloc_counter = 0; ++ struct tls *p = f (&var); ++ TEST_VERIFY (malloc_counter != 0); ++ printf ("access %s: %s() = %p\n", modname, sym, p); ++ TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0); ++ ++(p->a); ++} ++ ++static void * ++start (void *arg) ++{ ++ access_mod ("apply_tls"); ++ return arg; ++} ++ ++static int ++do_test (void) ++{ ++ if (!IS_SUPPORTED ()) ++ return EXIT_UNSUPPORTED; ++ ++ open_mod (); ++ pthread_t t = xpthread_create (NULL, start, NULL); ++ xpthread_join (t); ++ close_mod (); ++ ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/elf/tst-tls23.h +similarity index 52% +rename from sysdeps/x86_64/dl-trampoline-save.h +rename to elf/tst-tls23.h +index 84eac4a8..d0e73456 100644 +--- a/sysdeps/x86_64/dl-trampoline-save.h ++++ b/elf/tst-tls23.h +@@ -1,5 +1,5 @@ +-/* x86-64 PLT trampoline register save macros. +- Copyright (C) 2024 Free Software Foundation, Inc. ++/* Test that __tls_get_addr preserves caller-saved registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -16,19 +16,25 @@ + License along with the GNU C Library; if not, see + . */ + +-#ifndef DL_STACK_ALIGNMENT +-/* Due to GCC bug: ++#include + +- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 ++struct tls ++{ ++ int64_t a, b, c, d; ++}; + +- __tls_get_addr may be called with 8-byte stack alignment. Although +- this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume +- that stack will be always aligned at 16 bytes. */ +-# define DL_STACK_ALIGNMENT 8 ++extern struct tls *apply_tls (struct tls *); ++ ++/* An architecture can define them to verify that caller-saved registers ++ aren't changed by __tls_get_addr. */ ++#ifndef INIT_TLS_CALL ++# define INIT_TLS_CALL() ++#endif ++ ++#ifndef BEFORE_TLS_CALL ++# define BEFORE_TLS_CALL() + #endif + +-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align +- stack to 16 bytes before calling _dl_fixup. */ +-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ +- (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ +- || 16 > DL_STACK_ALIGNMENT) ++#ifndef AFTER_TLS_CALL ++# define AFTER_TLS_CALL() ++#endif +diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure +index d9bd1f85..16f5d345 100644 +--- a/sysdeps/aarch64/preconfigure ++++ b/sysdeps/aarch64/preconfigure +@@ -2,5 +2,6 @@ case "$machine" in + aarch64*) + base_machine=aarch64 + machine=aarch64 ++ mtls_traditional=trad + ;; + esac +diff --git a/sysdeps/i386/Makefile b/sysdeps/i386/Makefile +index a2e8c0b1..ee6470d7 100644 +--- a/sysdeps/i386/Makefile ++++ b/sysdeps/i386/Makefile +@@ -30,7 +30,9 @@ stack-align-test-flags += -malign-double + endif + + ifeq ($(subdir),elf) +-sysdep-dl-routines += tlsdesc dl-tlsdesc ++sysdep-dl-routines += \ ++ dl-tls-get-addr \ ++# sysdep-dl-routines + + tests += tst-audit3 + modules-names += tst-auditmod3a tst-auditmod3b +diff --git a/sysdeps/i386/dl-tls-get-addr.c b/sysdeps/i386/dl-tls-get-addr.c +new file mode 100644 +index 00000000..c97e5c57 +--- /dev/null ++++ b/sysdeps/i386/dl-tls-get-addr.c +@@ -0,0 +1,68 @@ ++/* Ifunc selector for ___tls_get_addr. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifdef SHARED ++# define ___tls_get_addr __redirect____tls_get_addr ++# include ++# undef ___tls_get_addr ++# undef __tls_get_addr ++ ++# define SYMBOL_NAME ___tls_get_addr ++# include ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (fnsave) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (fxsave) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (xsave) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (xsavec) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++ const struct cpu_features* cpu_features = __get_cpu_features (); ++ ++ if (cpu_features->xsave_state_size != 0) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) ++ return OPTIMIZE (xsavec); ++ else ++ return OPTIMIZE (xsave); ++ } ++ else if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) ++ return OPTIMIZE (fxsave); ++ return OPTIMIZE (fnsave); ++} ++ ++libc_ifunc_redirected (__redirect____tls_get_addr, ___tls_get_addr, ++ IFUNC_SELECTOR ()); ++ ++/* The special thing about the x86 TLS ABI is that we have two ++ variants of the __tls_get_addr function with different calling ++ conventions. The GNU version, which we are mostly concerned here, ++ takes the parameter in a register. The name is changed by adding ++ an additional underscore at the beginning. The Sun version uses ++ the normal calling convention. */ ++ ++rtld_hidden_proto (___tls_get_addr) ++rtld_hidden_def (___tls_get_addr) ++ ++void * ++__tls_get_addr (tls_index *ti) ++{ ++ return ___tls_get_addr (ti); ++} ++#endif +diff --git a/sysdeps/i386/dl-tls.h b/sysdeps/i386/dl-tls.h +index ec767809..c4b2419b 100644 +--- a/sysdeps/i386/dl-tls.h ++++ b/sysdeps/i386/dl-tls.h +@@ -29,33 +29,13 @@ typedef struct dl_tls_index + /* This is the prototype for the GNU version. */ + extern void *___tls_get_addr (tls_index *ti) + __attribute__ ((__regparm__ (1))); +-extern void *___tls_get_addr_internal (tls_index *ti) +- __attribute__ ((__regparm__ (1))) attribute_hidden; +- + # if IS_IN (rtld) +-/* The special thing about the x86 TLS ABI is that we have two +- variants of the __tls_get_addr function with different calling +- conventions. The GNU version, which we are mostly concerned here, +- takes the parameter in a register. The name is changed by adding +- an additional underscore at the beginning. The Sun version uses +- the normal calling convention. */ +-void * +-__tls_get_addr (tls_index *ti) +-{ +- return ___tls_get_addr_internal (ti); +-} +- +- + /* Prepare using the definition of __tls_get_addr in the generic + version of this file. */ +-# define __tls_get_addr __attribute__ ((__regparm__ (1))) ___tls_get_addr +-strong_alias (___tls_get_addr, ___tls_get_addr_internal) +-rtld_hidden_proto (___tls_get_addr) +-rtld_hidden_def (___tls_get_addr) +-#else +- ++# define __tls_get_addr \ ++ __attribute__ ((__regparm__ (1))) ___tls_get_addr_internal ++# else + /* Users should get the better interface. */ +-# define __tls_get_addr ___tls_get_addr +- ++# define __tls_get_addr ___tls_get_addr + # endif + #endif +diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h +index 36270285..8a595242 100644 +--- a/sysdeps/i386/dl-tlsdesc-dynamic.h ++++ b/sysdeps/i386/dl-tlsdesc-dynamic.h +@@ -16,34 +16,6 @@ + License along with the GNU C Library; if not, see + . */ + +-#undef REGISTER_SAVE_AREA +- +-#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 +-# error STATE_SAVE_ALIGNMENT must be multiple of 16 +-#endif +- +-#if DL_RUNTIME_RESOLVE_REALIGN_STACK +-# ifdef USE_FNSAVE +-# error USE_FNSAVE shouldn't be defined +-# endif +-# ifdef USE_FXSAVE +-/* Use fxsave to save all registers. */ +-# define REGISTER_SAVE_AREA 512 +-# endif +-#else +-# ifdef USE_FNSAVE +-/* Use fnsave to save x87 FPU stack registers. */ +-# define REGISTER_SAVE_AREA 108 +-# else +-# ifndef USE_FXSAVE +-# error USE_FXSAVE must be defined +-# endif +-/* Use fxsave to save all registers. Add 12 bytes to align the stack +- to 16 bytes. */ +-# define REGISTER_SAVE_AREA (512 + 12) +-# endif +-#endif +- + .hidden _dl_tlsdesc_dynamic + .global _dl_tlsdesc_dynamic + .type _dl_tlsdesc_dynamic,@function +@@ -104,85 +76,7 @@ _dl_tlsdesc_dynamic: + ret + .p2align 4,,7 + 2: +- cfi_adjust_cfa_offset (32) +-#if DL_RUNTIME_RESOLVE_REALIGN_STACK +- movl %ebx, -28(%esp) +- movl %esp, %ebx +- cfi_def_cfa_register(%ebx) +- and $-STATE_SAVE_ALIGNMENT, %esp +-#endif +-#ifdef REGISTER_SAVE_AREA +- subl $REGISTER_SAVE_AREA, %esp +-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +- cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) +-# endif +-#else +-# if !DL_RUNTIME_RESOLVE_REALIGN_STACK +-# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true +-# endif +- /* Allocate stack space of the required size to save the state. */ +- LOAD_PIC_REG (cx) +- subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp +-#endif +-#ifdef USE_FNSAVE +- fnsave (%esp) +-#elif defined USE_FXSAVE +- fxsave (%esp) +-#else +- /* Save the argument for ___tls_get_addr in EAX. */ +- movl %eax, %ecx +- movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax +- xorl %edx, %edx +- /* Clear the XSAVE Header. */ +-# ifdef USE_XSAVE +- movl %edx, (512)(%esp) +- movl %edx, (512 + 4 * 1)(%esp) +- movl %edx, (512 + 4 * 2)(%esp) +- movl %edx, (512 + 4 * 3)(%esp) +-# endif +- movl %edx, (512 + 4 * 4)(%esp) +- movl %edx, (512 + 4 * 5)(%esp) +- movl %edx, (512 + 4 * 6)(%esp) +- movl %edx, (512 + 4 * 7)(%esp) +- movl %edx, (512 + 4 * 8)(%esp) +- movl %edx, (512 + 4 * 9)(%esp) +- movl %edx, (512 + 4 * 10)(%esp) +- movl %edx, (512 + 4 * 11)(%esp) +- movl %edx, (512 + 4 * 12)(%esp) +- movl %edx, (512 + 4 * 13)(%esp) +- movl %edx, (512 + 4 * 14)(%esp) +- movl %edx, (512 + 4 * 15)(%esp) +-# ifdef USE_XSAVE +- xsave (%esp) +-# else +- xsavec (%esp) +-# endif +- /* Restore the argument for ___tls_get_addr in EAX. */ +- movl %ecx, %eax +-#endif +- call HIDDEN_JUMPTARGET (___tls_get_addr) +- /* Get register content back. */ +-#ifdef USE_FNSAVE +- frstor (%esp) +-#elif defined USE_FXSAVE +- fxrstor (%esp) +-#else +- /* Save and retore ___tls_get_addr return value stored in EAX. */ +- movl %eax, %ecx +- movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax +- xorl %edx, %edx +- xrstor (%esp) +- movl %ecx, %eax +-#endif +-#if DL_RUNTIME_RESOLVE_REALIGN_STACK +- mov %ebx, %esp +- cfi_def_cfa_register(%esp) +- movl -28(%esp), %ebx +- cfi_restore(%ebx) +-#else +- addl $REGISTER_SAVE_AREA, %esp +- cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) +-#endif ++#include "tls-get-addr-wrapper.h" + jmp 1b + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic +diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S +index 725506f5..1609cab0 100644 +--- a/sysdeps/i386/dl-tlsdesc.S ++++ b/sysdeps/i386/dl-tlsdesc.S +@@ -22,23 +22,6 @@ + #include + #include "tlsdesc.h" + +-#ifndef DL_STACK_ALIGNMENT +-/* Due to GCC bug: +- +- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 +- +- __tls_get_addr may be called with 4-byte stack alignment. Although +- this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume +- that stack will be always aligned at 16 bytes. */ +-# define DL_STACK_ALIGNMENT 4 +-#endif +- +-/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align +- stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */ +-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ +- (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ +- || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) +- + .text + + /* This function is used to compute the TP offset for symbols in +diff --git a/sysdeps/i386/tls-get-addr-wrapper.h b/sysdeps/i386/tls-get-addr-wrapper.h +new file mode 100644 +index 00000000..0708e5ad +--- /dev/null ++++ b/sysdeps/i386/tls-get-addr-wrapper.h +@@ -0,0 +1,127 @@ ++/* Wrapper of i386 ___tls_get_addr to save and restore vector registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#undef REGISTER_SAVE_AREA ++ ++#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 ++# error STATE_SAVE_ALIGNMENT must be multiple of 16 ++#endif ++ ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++# ifdef USE_FNSAVE ++# error USE_FNSAVE shouldn't be defined ++# endif ++# ifdef USE_FXSAVE ++/* Use fxsave to save all registers. */ ++# define REGISTER_SAVE_AREA 512 ++# endif ++#else ++# ifdef USE_FNSAVE ++/* Use fnsave to save x87 FPU stack registers. */ ++# define REGISTER_SAVE_AREA 108 ++# else ++# ifndef USE_FXSAVE ++# error USE_FXSAVE must be defined ++# endif ++/* Use fxsave to save all registers. Add 12 bytes to align the stack ++ to 16 bytes. */ ++# define REGISTER_SAVE_AREA (512 + 12) ++# endif ++#endif ++ ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ movl %ebx, 28(%esp) ++ movl %esp, %ebx ++ cfi_def_cfa_register(%ebx) ++ and $-STATE_SAVE_ALIGNMENT, %esp ++#endif ++#ifdef REGISTER_SAVE_AREA ++ subl $REGISTER_SAVE_AREA, %esp ++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK ++ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) ++# endif ++#else ++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK ++# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true ++# endif ++ /* Allocate stack space of the required size to save the state. */ ++ LOAD_PIC_REG (cx) ++ subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET \ ++ +XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp ++#endif ++#ifdef USE_FNSAVE ++ fnsave (%esp) ++#elif defined USE_FXSAVE ++ fxsave (%esp) ++#else ++ /* Save the argument for ___tls_get_addr in EAX. */ ++ movl %eax, %ecx ++ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax ++ xorl %edx, %edx ++ /* Clear the XSAVE Header. */ ++# ifdef USE_XSAVE ++ movl %edx, (512)(%esp) ++ movl %edx, (512 + 4 * 1)(%esp) ++ movl %edx, (512 + 4 * 2)(%esp) ++ movl %edx, (512 + 4 * 3)(%esp) ++# endif ++ movl %edx, (512 + 4 * 4)(%esp) ++ movl %edx, (512 + 4 * 5)(%esp) ++ movl %edx, (512 + 4 * 6)(%esp) ++ movl %edx, (512 + 4 * 7)(%esp) ++ movl %edx, (512 + 4 * 8)(%esp) ++ movl %edx, (512 + 4 * 9)(%esp) ++ movl %edx, (512 + 4 * 10)(%esp) ++ movl %edx, (512 + 4 * 11)(%esp) ++ movl %edx, (512 + 4 * 12)(%esp) ++ movl %edx, (512 + 4 * 13)(%esp) ++ movl %edx, (512 + 4 * 14)(%esp) ++ movl %edx, (512 + 4 * 15)(%esp) ++# ifdef USE_XSAVE ++ xsave (%esp) ++# else ++ xsavec (%esp) ++# endif ++ /* Restore the argument for ___tls_get_addr in EAX. */ ++ movl %ecx, %eax ++#endif ++ call ___tls_get_addr_internal ++ /* Get register content back. */ ++#ifdef USE_FNSAVE ++ frstor (%esp) ++#elif defined USE_FXSAVE ++ fxrstor (%esp) ++#else ++ /* Save and retore ___tls_get_addr return value stored in EAX. */ ++ movl %eax, %ecx ++ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax ++ xorl %edx, %edx ++ xrstor (%esp) ++ movl %ecx, %eax ++#endif ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ mov %ebx, %esp ++ cfi_def_cfa_register(%esp) ++ movl 28(%esp), %ebx ++ cfi_restore(%ebx) ++#else ++ addl $REGISTER_SAVE_AREA, %esp ++ cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) ++#endif ++ ++#undef STATE_SAVE_ALIGNMENT +diff --git a/sysdeps/i386/tls_get_addr.S b/sysdeps/i386/tls_get_addr.S +new file mode 100644 +index 00000000..7d143d8a +--- /dev/null ++++ b/sysdeps/i386/tls_get_addr.S +@@ -0,0 +1,57 @@ ++/* Thread-local storage handling in the ELF dynamic linker. i386 version. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++ .text ++#ifdef SHARED ++# define USE_FNSAVE ++# define MINIMUM_ALIGNMENT 4 ++# define STATE_SAVE_ALIGNMENT 4 ++# define ___tls_get_addr _____tls_get_addr_fnsave ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef MINIMUM_ALIGNMENT ++# undef USE_FNSAVE ++ ++# define MINIMUM_ALIGNMENT 16 ++ ++# define USE_FXSAVE ++# define STATE_SAVE_ALIGNMENT 16 ++# define ___tls_get_addr _____tls_get_addr_fxsave ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef USE_FXSAVE ++ ++# define USE_XSAVE ++# define STATE_SAVE_ALIGNMENT 64 ++# define ___tls_get_addr _____tls_get_addr_xsave ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef USE_XSAVE ++ ++# define USE_XSAVEC ++# define STATE_SAVE_ALIGNMENT 64 ++# define ___tls_get_addr _____tls_get_addr_xsavec ++# include "tls_get_addr.h" ++# undef ___tls_get_addr ++# undef USE_XSAVEC ++#endif /* SHARED */ +diff --git a/sysdeps/i386/tls_get_addr.h b/sysdeps/i386/tls_get_addr.h +new file mode 100644 +index 00000000..18257987 +--- /dev/null ++++ b/sysdeps/i386/tls_get_addr.h +@@ -0,0 +1,42 @@ ++/* Thread-local storage handling in the ELF dynamic linker. i386 version. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++ .hidden ___tls_get_addr ++ .global ___tls_get_addr ++ .type ___tls_get_addr,@function ++ ++ /* This function is a wrapper of ___tls_get_addr_internal to ++ preserve caller-saved vector registers. */ ++ ++ cfi_startproc ++ .align 16 ++___tls_get_addr: ++ /* Like all TLS resolvers, preserve call-clobbered registers. ++ We need two scratch regs anyway. */ ++ subl $32, %esp ++ cfi_adjust_cfa_offset (32) ++ movl %ecx, 20(%esp) ++ movl %edx, 24(%esp) ++#include "tls-get-addr-wrapper.h" ++ movl 20(%esp), %ecx ++ movl 24(%esp), %edx ++ addl $32, %esp ++ cfi_adjust_cfa_offset (-32) ++ ret ++ cfi_endproc ++ .size ___tls_get_addr, .-___tls_get_addr +diff --git a/sysdeps/loongarch/preconfigure b/sysdeps/loongarch/preconfigure +index 0d1e9ed8..6726ab83 100644 +--- a/sysdeps/loongarch/preconfigure ++++ b/sysdeps/loongarch/preconfigure +@@ -44,6 +44,7 @@ loongarch*) + + base_machine=loongarch + mtls_descriptor=desc ++ mtls_traditional=trad + ;; + esac + +diff --git a/sysdeps/loongarch/preconfigure.ac b/sysdeps/loongarch/preconfigure.ac +index 67e43570..31e9579e 100644 +--- a/sysdeps/loongarch/preconfigure.ac ++++ b/sysdeps/loongarch/preconfigure.ac +@@ -41,6 +41,7 @@ loongarch*) + AC_DEFINE_UNQUOTED([LOONGARCH_ABI_FRLEN], [$abi_flen]) + + base_machine=loongarch ++ mtls_traditional=trad + ;; + esac + +diff --git a/sysdeps/powerpc/Makefile b/sysdeps/powerpc/Makefile +index 5e6cb07c..5cdb64f2 100644 +--- a/sysdeps/powerpc/Makefile ++++ b/sysdeps/powerpc/Makefile +@@ -28,6 +28,11 @@ tst-cache-ppc-static-dlopen-ENV = LD_LIBRARY_PATH=$(objpfx):$(common-objpfx):$(c + $(objpfx)tst-cache-ppc-static-dlopen.out: $(objpfx)mod-cache-ppc.so + + $(objpfx)tst-cache-ppc: $(objpfx)mod-cache-ppc.so ++ ++# The test checks if the __tls_get_addr does not clobber caller-saved ++# register, so disable the powerpc specific optimization to force a ++# __tls_get_addr call. ++LDFLAGS-tst-tls23-mod.so = -Wl,--no-tls-get-addr-optimize + endif + + ifneq (no,$(multi-arch)) +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index b76b72ca..d39c2e89 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -4,7 +4,13 @@ endif + + ifeq ($(subdir),elf) + sysdep_routines += get-cpuid-feature-leaf +-sysdep-dl-routines += dl-get-cpu-features ++sysdep-dl-routines += \ ++ dl-get-cpu-features \ ++ dl-tlsdesc \ ++ tls_get_addr \ ++ tlsdesc \ ++# sysdep-dl-routines ++ + sysdep_headers += \ + bits/platform/features.h \ + bits/platform/x86.h \ +@@ -111,6 +117,14 @@ $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \ + $(objpfx)tst-gnu2-tls2mod0.so \ + $(objpfx)tst-gnu2-tls2mod1.so \ + $(objpfx)tst-gnu2-tls2mod2.so ++ ++CFLAGS-tst-tls23.c += -msse2 ++CFLAGS-tst-tls23-mod.c += -msse2 -mtune=haswell ++ ++LDFLAGS-tst-tls23 += -rdynamic ++tst-tls23-mod.so-no-z-defs = yes ++ ++$(objpfx)tst-tls23-mod.so: $(libsupport) + endif + + ifeq ($(subdir),math) +diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h +index 0f744859..13948ae2 100644 +--- a/sysdeps/x86/sysdep.h ++++ b/sysdeps/x86/sysdep.h +@@ -183,6 +183,29 @@ + + #define atom_text_section .section ".text.atom", "ax" + ++#ifndef DL_STACK_ALIGNMENT ++/* Due to GCC bug: ++ ++ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 ++ ++ __tls_get_addr may be called with 8-byte/4-byte stack alignment. ++ Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't ++ assume that stack will be always aligned at 16 bytes. */ ++# ifdef __x86_64__ ++# define DL_STACK_ALIGNMENT 8 ++# define MINIMUM_ALIGNMENT 16 ++# else ++# define DL_STACK_ALIGNMENT 4 ++# endif ++#endif ++ ++/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for ++ STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling ++ _dl_fixup/__tls_get_addr. */ ++#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ ++ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ ++ || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) ++ + #endif /* __ASSEMBLER__ */ + + #endif /* _X86_SYSDEP_H */ +diff --git a/sysdeps/x86/tst-tls23.c b/sysdeps/x86/tst-tls23.c +new file mode 100644 +index 00000000..6130d91c +--- /dev/null ++++ b/sysdeps/x86/tst-tls23.c +@@ -0,0 +1,22 @@ ++#ifndef __x86_64__ ++#include ++ ++#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) ++#endif ++ ++/* Set XMM0...XMM7 to all 1s. */ ++#define PREPARE_MALLOC() \ ++{ \ ++ asm volatile ("pcmpeqd %%xmm0, %%xmm0" : : : "xmm0" ); \ ++ asm volatile ("pcmpeqd %%xmm1, %%xmm1" : : : "xmm1" ); \ ++ asm volatile ("pcmpeqd %%xmm2, %%xmm2" : : : "xmm2" ); \ ++ asm volatile ("pcmpeqd %%xmm3, %%xmm3" : : : "xmm3" ); \ ++ asm volatile ("pcmpeqd %%xmm4, %%xmm4" : : : "xmm4" ); \ ++ asm volatile ("pcmpeqd %%xmm5, %%xmm5" : : : "xmm5" ); \ ++ asm volatile ("pcmpeqd %%xmm6, %%xmm6" : : : "xmm6" ); \ ++ asm volatile ("pcmpeqd %%xmm7, %%xmm7" : : : "xmm7" ); \ ++} ++ ++#include ++ ++v2di v1, v2, v3; +diff --git a/sysdeps/x86/tst-tls23.h b/sysdeps/x86/tst-tls23.h +new file mode 100644 +index 00000000..21cee4ca +--- /dev/null ++++ b/sysdeps/x86/tst-tls23.h +@@ -0,0 +1,35 @@ ++/* Test that __tls_get_addr preserves XMM registers. ++ Copyright (C) 2025 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++typedef long long v2di __attribute__((vector_size(16))); ++extern v2di v1, v2, v3; ++ ++#define BEFORE_TLS_CALL() \ ++ v1 = __extension__(v2di){0, 0}; \ ++ v2 = __extension__(v2di){0, 0}; ++ ++#define AFTER_TLS_CALL() \ ++ v3 = __extension__(v2di){0, 0}; \ ++ asm volatile ("" : "+x" (v3)); \ ++ union { v2di x; long long a[2]; } u; \ ++ u.x = v3; \ ++ TEST_VERIFY_EXIT (u.a[0] == 0 && u.a[1] == 0); ++ ++#include +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index 75c5029b..9c8af273 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -32,9 +32,6 @@ ifeq ($(subdir),elf) + CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\ + -mno-mmx) + +-sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr +- +-tests += ifuncmain8 + modules-names += ifuncmod8 + + $(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so +diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S +index 6f971281..1b80dd8a 100644 +--- a/sysdeps/x86_64/dl-tlsdesc.S ++++ b/sysdeps/x86_64/dl-tlsdesc.S +@@ -21,7 +21,6 @@ + #include + #include + #include "tlsdesc.h" +-#include "dl-trampoline-save.h" + + /* Area on stack to save and restore registers used for parameter + passing when calling _dl_tlsdesc_dynamic. */ +diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S +index e84d6ffc..24616bb8 100644 +--- a/sysdeps/x86_64/dl-trampoline.S ++++ b/sysdeps/x86_64/dl-trampoline.S +@@ -22,7 +22,6 @@ + #include + #include + #include +-#include "dl-trampoline-save.h" + + /* Area on stack to save and restore registers used for parameter + passing when calling _dl_fixup. */ +-- +2.27.0 + diff --git a/x86-64-Add-GLIBC_ABI_GNU2_TLS-version-BZ-33129.patch b/x86-64-Add-GLIBC_ABI_GNU2_TLS-version-BZ-33129.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c32c7e19c9c934d6aca6afa5463f93fde56d675 --- /dev/null +++ b/x86-64-Add-GLIBC_ABI_GNU2_TLS-version-BZ-33129.patch @@ -0,0 +1,69 @@ +From 3f0dd818e54157d6042c4a03b07d93f05d1e492a Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Mon, 28 Jul 2025 12:18:22 -0700 +Subject: [PATCH] x86-64: Add GLIBC_ABI_GNU2_TLS version [BZ #33129] + +Programs and shared libraries compiled with -mtls-dialect=gnu2 may fail +silently at run-time against glibc without the GNU2 TLS run-time fix +for: + +https://sourceware.org/bugzilla/show_bug.cgi?id=31372 + +Add GLIBC_ABI_GNU2_TLS version to indicate that glibc has the working +GNU2 TLS run-time. Linker can add the GLIBC_ABI_GNU2_TLS version to +binaries which depend on the working GNU2 TLS run-time: + +https://sourceware.org/bugzilla/show_bug.cgi?id=33130 + +so that such programs and shared libraries will fail to load and run at +run-time against libc.so without the GLIBC_ABI_GNU2_TLS version, instead +of fail silently at random. + +This fixes BZ #33129. + +Signed-off-by: H.J. Lu +Reviewed-by: Sam James +(cherry picked from commit 9df8fa397d515dc86ff5565f6c45625e672d539e) +--- + sysdeps/x86_64/Makefile | 8 ++++++++ + sysdeps/x86_64/Versions | 5 +++++ + 2 files changed, 13 insertions(+) + +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index 9c8af27300..191cf7f5b0 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -181,6 +181,14 @@ $(objpfx)check-dt-x86-64-plt.out: $(common-objpfx)libc.so + $(evaluate-test) + generated += check-dt-x86-64-plt.out + ++tests-special += $(objpfx)check-gnu2-tls.out ++ ++$(objpfx)check-gnu2-tls.out: $(common-objpfx)libc.so ++ LC_ALL=C $(READELF) -V -W $< \ ++ | sed -ne '/.gnu.version_d/, /.gnu.version_r/ p' \ ++ | grep GLIBC_ABI_GNU2_TLS > $@; \ ++ $(evaluate-test) ++generated += check-gnu2-tls.out + endif # $(subdir) == elf + + ifeq ($(subdir),csu) +diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions +index 6a989ad3b3..0a759029e5 100644 +--- a/sysdeps/x86_64/Versions ++++ b/sysdeps/x86_64/Versions +@@ -5,6 +5,11 @@ libc { + GLIBC_2.13 { + __fentry__; + } ++ GLIBC_ABI_GNU2_TLS { ++ # This symbol is used only for empty version map and will be removed ++ # by scripts/versions.awk. ++ __placeholder_only_for_empty_version_map; ++ } + GLIBC_ABI_DT_X86_64_PLT { + # This symbol is used only for empty version map and will be removed + # by scripts/versions.awk. +-- +2.27.0 + diff --git a/x86-Link-tst-gnu2-tls2-x86-noxsave-c-xsavec-with-lib.patch b/x86-Link-tst-gnu2-tls2-x86-noxsave-c-xsavec-with-lib.patch new file mode 100644 index 0000000000000000000000000000000000000000..7ba74c2206854c3ef2ffb1f93e335f8e176b303c --- /dev/null +++ b/x86-Link-tst-gnu2-tls2-x86-noxsave-c-xsavec-with-lib.patch @@ -0,0 +1,35 @@ +From e925c13e039df061c7bf48e458e9ea26fa3ca8a3 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Mon, 31 Mar 2025 21:33:18 +0200 +Subject: [PATCH] x86: Link tst-gnu2-tls2-x86-noxsave{,c,xsavec} with + libpthread + +This fixes a test build failure on Hurd. + +Fixes commit 145097dff170507fe73190e8e41194f5b5f7e6bf ("x86: Use separate +variable for TLSDESC XSAVE/XSAVEC state size (bug 32810)"). + +Reviewed-by: Adhemerval Zanella +(cherry picked from commit c6e2895695118ab59c7b17feb0fcb75a53e3478c) +(cherry picked from commit 837a36c371f18a3152d032e8060f4e5120c25e2b) +--- + sysdeps/x86/Makefile | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index e4286a7acd..b76b72ca4d 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -102,6 +102,9 @@ LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy + tst-gnu2-tls2-x86-noxsave-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE + tst-gnu2-tls2-x86-noxsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC + tst-gnu2-tls2-x86-noxsavexsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE,-XSAVEC ++$(objpfx)tst-gnu2-tls2-x86-noxsave: $(shared-thread-library) ++$(objpfx)tst-gnu2-tls2-x86-noxsavec: $(shared-thread-library) ++$(objpfx)tst-gnu2-tls2-x86-noxsavexsavec: $(shared-thread-library) + $(objpfx)tst-gnu2-tls2-x86-noxsave.out \ + $(objpfx)tst-gnu2-tls2-x86-noxsavec.out \ + $(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \ +-- +2.27.0 + diff --git a/x86-Optimize-xstate-size-calculation.patch b/x86-Optimize-xstate-size-calculation.patch new file mode 100644 index 0000000000000000000000000000000000000000..b273b2db1b0a6e393eec0bf5aa1080606630df61 --- /dev/null +++ b/x86-Optimize-xstate-size-calculation.patch @@ -0,0 +1,166 @@ +From 2d34e48a2858ae901b4cf532a5d7a88931f1c149 Mon Sep 17 00:00:00 2001 +From: Sunil K Pandey +Date: Thu, 3 Apr 2025 13:00:45 -0700 +Subject: [PATCH] x86: Optimize xstate size calculation + +Scan xstate IDs up to the maximum supported xstate ID. Remove the +separate AMX xstate calculation. Instead, exclude the AMX space from +the start of TILECFG to the end of TILEDATA in xsave_state_size. + +Completed validation on SKL/SKX/SPR/SDE and compared xsave state size +with "ld.so --list-diagnostics" option, no regression. + +Co-Authored-By: H.J. Lu +Reviewed-by: Sunil K Pandey +(cherry picked from commit 70b648855185e967e54668b101d24704c3fb869d) +--- + sysdeps/x86/cpu-features.c | 74 ++++++++++---------------------------- + sysdeps/x86/sysdep.h | 6 ++++ + 2 files changed, 24 insertions(+), 56 deletions(-) + +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index b9ae482429..975b75c68b 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -278,13 +278,8 @@ update_active (struct cpu_features *cpu_features) + /* Check if XSAVEC is available. */ + if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC)) + { +- unsigned int xstate_comp_offsets[32]; +- unsigned int xstate_comp_sizes[32]; +-#ifdef __x86_64__ +- unsigned int xstate_amx_comp_offsets[32]; +- unsigned int xstate_amx_comp_sizes[32]; +- unsigned int amx_ecx; +-#endif ++ unsigned int xstate_comp_offsets[X86_XSTATE_MAX_ID + 1]; ++ unsigned int xstate_comp_sizes[X86_XSTATE_MAX_ID + 1]; + unsigned int i; + + xstate_comp_offsets[0] = 0; +@@ -292,39 +287,16 @@ update_active (struct cpu_features *cpu_features) + xstate_comp_offsets[2] = 576; + xstate_comp_sizes[0] = 160; + xstate_comp_sizes[1] = 256; +-#ifdef __x86_64__ +- xstate_amx_comp_offsets[0] = 0; +- xstate_amx_comp_offsets[1] = 160; +- xstate_amx_comp_offsets[2] = 576; +- xstate_amx_comp_sizes[0] = 160; +- xstate_amx_comp_sizes[1] = 256; +-#endif + +- for (i = 2; i < 32; i++) ++ for (i = 2; i <= X86_XSTATE_MAX_ID; i++) + { + if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0) + { + __cpuid_count (0xd, i, eax, ebx, ecx, edx); +-#ifdef __x86_64__ +- /* Include this in xsave_state_full_size. */ +- amx_ecx = ecx; +- xstate_amx_comp_sizes[i] = eax; +- if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0) +- { +- /* Exclude this from xsave_state_size. */ +- ecx = 0; +- xstate_comp_sizes[i] = 0; +- } +- else +-#endif +- xstate_comp_sizes[i] = eax; ++ xstate_comp_sizes[i] = eax; + } + else + { +-#ifdef __x86_64__ +- amx_ecx = 0; +- xstate_amx_comp_sizes[i] = 0; +-#endif + ecx = 0; + xstate_comp_sizes[i] = 0; + } +@@ -333,42 +305,32 @@ update_active (struct cpu_features *cpu_features) + { + xstate_comp_offsets[i] + = (xstate_comp_offsets[i - 1] +- + xstate_comp_sizes[i -1]); ++ + xstate_comp_sizes[i - 1]); + if ((ecx & (1 << 1)) != 0) + xstate_comp_offsets[i] + = ALIGN_UP (xstate_comp_offsets[i], 64); +-#ifdef __x86_64__ +- xstate_amx_comp_offsets[i] +- = (xstate_amx_comp_offsets[i - 1] +- + xstate_amx_comp_sizes[i - 1]); +- if ((amx_ecx & (1 << 1)) != 0) +- xstate_amx_comp_offsets[i] +- = ALIGN_UP (xstate_amx_comp_offsets[i], +- 64); +-#endif + } + } + + /* Use XSAVEC. */ + unsigned int size +- = xstate_comp_offsets[31] + xstate_comp_sizes[31]; ++ = (xstate_comp_offsets[X86_XSTATE_MAX_ID] ++ + xstate_comp_sizes[X86_XSTATE_MAX_ID]); + if (size) + { ++ size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, ++ 64); + #ifdef __x86_64__ +- unsigned int amx_size +- = (xstate_amx_comp_offsets[31] +- + xstate_amx_comp_sizes[31]); +- amx_size +- = ALIGN_UP ((amx_size +- + TLSDESC_CALL_REGISTER_SAVE_AREA), +- 64); +- /* Set TLSDESC state size to the compact AMX +- state size for XSAVEC. */ +- _dl_x86_features_tlsdesc_state_size = amx_size; ++ _dl_x86_features_tlsdesc_state_size = size; ++ /* Exclude the AMX space from the start of TILECFG ++ space to the end of TILEDATA space. If CPU ++ doesn't support AMX, TILECFG offset is the same ++ as TILEDATA + 1 offset. Otherwise, they are ++ multiples of 64. */ ++ size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] ++ - xstate_comp_offsets[X86_XSTATE_TILECFG_ID]); + #endif +- cpu_features->xsave_state_size +- = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, +- 64); ++ cpu_features->xsave_state_size = size; + CPU_FEATURE_SET (cpu_features, XSAVEC); + } + } +diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h +index 9dab03a488..0f744859a0 100644 +--- a/sysdeps/x86/sysdep.h ++++ b/sysdeps/x86/sysdep.h +@@ -102,6 +102,9 @@ + | (1 << X86_XSTATE_ZMM_ID) \ + | (1 << X86_XSTATE_APX_F_ID)) + ++/* The maximum supported xstate ID. */ ++# define X86_XSTATE_MAX_ID X86_XSTATE_APX_F_ID ++ + /* AMX state mask. */ + # define AMX_STATE_SAVE_MASK \ + ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID)) +@@ -123,6 +126,9 @@ + | (1 << X86_XSTATE_K_ID) \ + | (1 << X86_XSTATE_ZMM_H_ID)) + ++/* The maximum supported xstate ID. */ ++# define X86_XSTATE_MAX_ID X86_XSTATE_ZMM_H_ID ++ + /* States to be included in xsave_state_size. */ + # define FULL_STATE_SAVE_MASK STATE_SAVE_MASK + #endif +-- +2.27.0 + diff --git a/x86-Use-separate-variable-for-TLSDESC-XSAVE-XSAVEC-s.patch b/x86-Use-separate-variable-for-TLSDESC-XSAVE-XSAVEC-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..9a1ddd3365d2dc0273770395e75fc676646c4a6f --- /dev/null +++ b/x86-Use-separate-variable-for-TLSDESC-XSAVE-XSAVEC-s.patch @@ -0,0 +1,216 @@ +From 5cb05015c9b28ea521d816237805fbbee5224c86 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Fri, 28 Mar 2025 09:26:59 +0100 +Subject: [PATCH] x86: Use separate variable for TLSDESC XSAVE/XSAVEC state + size (bug 32810) + +Previously, the initialization code reused the xsave_state_full_size +member of struct cpu_features for the TLSDESC state size. However, +the tunable processing code assumes that this member has the +original XSAVE (non-compact) state size, so that it can use its +value if XSAVEC is disabled via tunable. + +This change uses a separate variable and not a struct member because +the value is only needed in ld.so and the static libc, but not in +libc.so. As a result, struct cpu_features layout does not change, +helping a future backport of this change. + +Fixes commit 9b7091415af47082664717210ac49d51551456ab ("x86-64: +Update _dl_tlsdesc_dynamic to preserve AMX registers"). + +Reviewed-by: H.J. Lu +(cherry picked from commit 145097dff170507fe73190e8e41194f5b5f7e6bf) +--- + NEWS | 4 ++++ + sysdeps/x86/Makefile | 19 +++++++++++++++++++ + sysdeps/x86/cpu-features.c | 11 ++++++----- + sysdeps/x86/cpu-tunables.c | 2 ++ + sysdeps/x86/dl-diagnostics-cpu.c | 2 ++ + sysdeps/x86/include/cpu-features.h | 9 +++++++-- + sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c | 1 + + sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c | 1 + + sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c | 1 + + sysdeps/x86_64/dl-tlsdesc-dynamic.h | 2 +- + 10 files changed, 44 insertions(+), 8 deletions(-) + create mode 100644 sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c + create mode 100644 sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c + create mode 100644 sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c + +diff --git a/NEWS b/NEWS +index 77e89c9619..7f3049367f 100644 +--- a/NEWS ++++ b/NEWS +@@ -10,6 +10,10 @@ Version 2.38.1 + * Sync with Linux kernel 6.6 shadow stack interface. The --enable-cet + configure option in only supported on x86-64. + ++The following bugs are resolved with this release: ++ ++ [32810] Crash on x86-64 if XSAVEC disable via tunable ++ + Deprecated and removed features, and other changes affecting compatibility: + + * __rseq_size now denotes the size of the active rseq area (20 bytes +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index 02ecfbf146..e4286a7acd 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -21,6 +21,9 @@ tests += \ + tst-cpu-features-supports-static \ + tst-get-cpu-features \ + tst-get-cpu-features-static \ ++ tst-gnu2-tls2-x86-noxsave \ ++ tst-gnu2-tls2-x86-noxsavec \ ++ tst-gnu2-tls2-x86-noxsavexsavec \ + # tests + tests-static += \ + tst-cpu-features-cpuinfo-static \ +@@ -89,6 +92,22 @@ CFLAGS-tst-gnu2-tls2.c += -msse + CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell + CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell + CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell ++ ++LDFLAGS-tst-gnu2-tls2-x86-noxsave += -Wl,-z,lazy ++LDFLAGS-tst-gnu2-tls2-x86-noxsavec += -Wl,-z,lazy ++LDFLAGS-tst-gnu2-tls2-x86-noxsavexsavec += -Wl,-z,lazy ++ ++# Test for bug 32810: incorrect XSAVE state size if XSAVEC is disabled ++# via tunable. ++tst-gnu2-tls2-x86-noxsave-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE ++tst-gnu2-tls2-x86-noxsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC ++tst-gnu2-tls2-x86-noxsavexsavec-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVE,-XSAVEC ++$(objpfx)tst-gnu2-tls2-x86-noxsave.out \ ++$(objpfx)tst-gnu2-tls2-x86-noxsavec.out \ ++$(objpfx)tst-gnu2-tls2-x86-noxsavexsavec.out: \ ++ $(objpfx)tst-gnu2-tls2mod0.so \ ++ $(objpfx)tst-gnu2-tls2mod1.so \ ++ $(objpfx)tst-gnu2-tls2mod2.so + endif + + ifeq ($(subdir),math) +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index 4b75d3bcf2..b9ae482429 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -62,6 +62,8 @@ extern void TUNABLE_CALLBACK (set_x86_shstk) (tunable_val_t *) + # include + #endif + ++unsigned long int _dl_x86_features_tlsdesc_state_size; ++ + static void + update_active (struct cpu_features *cpu_features) + { +@@ -271,6 +273,7 @@ update_active (struct cpu_features *cpu_features) + = xsave_state_full_size; + cpu_features->xsave_state_full_size + = xsave_state_full_size; ++ _dl_x86_features_tlsdesc_state_size = xsave_state_full_size; + + /* Check if XSAVEC is available. */ + if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC)) +@@ -359,11 +362,9 @@ update_active (struct cpu_features *cpu_features) + = ALIGN_UP ((amx_size + + TLSDESC_CALL_REGISTER_SAVE_AREA), + 64); +- /* Set xsave_state_full_size to the compact AMX +- state size for XSAVEC. NB: xsave_state_full_size +- is only used in _dl_tlsdesc_dynamic_xsave and +- _dl_tlsdesc_dynamic_xsavec. */ +- cpu_features->xsave_state_full_size = amx_size; ++ /* Set TLSDESC state size to the compact AMX ++ state size for XSAVEC. */ ++ _dl_x86_features_tlsdesc_state_size = amx_size; + #endif + cpu_features->xsave_state_size + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, +diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c +index c144124142..7cd2899b40 100644 +--- a/sysdeps/x86/cpu-tunables.c ++++ b/sysdeps/x86/cpu-tunables.c +@@ -196,6 +196,8 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp) + /* Update xsave_state_size to XSAVE state size. */ + cpu_features->xsave_state_size + = cpu_features->xsave_state_full_size; ++ _dl_x86_features_tlsdesc_state_size ++ = cpu_features->xsave_state_full_size; + CPU_FEATURE_UNSET (cpu_features, XSAVEC); + } + } +diff --git a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c +index 5aab63e532..3b32056362 100644 +--- a/sysdeps/x86/dl-diagnostics-cpu.c ++++ b/sysdeps/x86/dl-diagnostics-cpu.c +@@ -78,6 +78,8 @@ _dl_diagnostics_cpu (void) + cpu_features->xsave_state_size); + print_cpu_features_value ("xsave_state_full_size", + cpu_features->xsave_state_full_size); ++ print_cpu_features_value ("tlsdesc_state_full_size", ++ _dl_x86_features_tlsdesc_state_size); + print_cpu_features_value ("data_cache_size", cpu_features->data_cache_size); + print_cpu_features_value ("shared_cache_size", + cpu_features->shared_cache_size); +diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h +index bedea77167..3a786a12d9 100644 +--- a/sysdeps/x86/include/cpu-features.h ++++ b/sysdeps/x86/include/cpu-features.h +@@ -909,8 +909,6 @@ struct cpu_features + /* The full state size for XSAVE when XSAVEC is disabled by + + GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC +- +- and the AMX state size when XSAVEC is available. + */ + unsigned int xsave_state_full_size; + /* Data cache size for use in memory and string routines, typically +@@ -962,6 +960,13 @@ extern const struct cpu_features *_dl_x86_get_cpu_features (void) + + #define __get_cpu_features() _dl_x86_get_cpu_features() + ++#if IS_IN (rtld) || IS_IN (libc) ++/* XSAVE/XSAVEC state size used by TLS descriptors. Compared to ++ xsave_state_size from struct cpu_features, this includes additional ++ registers. */ ++extern unsigned long int _dl_x86_features_tlsdesc_state_size attribute_hidden; ++#endif ++ + #if defined (_LIBC) && !IS_IN (nonlib) + /* Unused for x86. */ + # define INIT_ARCH() +diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c +new file mode 100644 +index 0000000000..f0024c143d +--- /dev/null ++++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsave.c +@@ -0,0 +1 @@ ++#include +diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c +new file mode 100644 +index 0000000000..f0024c143d +--- /dev/null ++++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavec.c +@@ -0,0 +1 @@ ++#include +diff --git a/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c +new file mode 100644 +index 0000000000..f0024c143d +--- /dev/null ++++ b/sysdeps/x86/tst-gnu2-tls2-x86-noxsavexsavec.c +@@ -0,0 +1 @@ ++#include +diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h +index 9f02cfc3eb..44d948696f 100644 +--- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h ++++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h +@@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic: + # endif + #else + /* Allocate stack space of the required size to save the state. */ +- sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP ++ sub _dl_x86_features_tlsdesc_state_size(%rip), %RSP_LP + #endif + /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9, + r10 and r11. */ +-- +2.27.0 +