diff --git a/0001-S390-Do-not-clobber-r7-in-clone-BZ-31402.patch b/0001-S390-Do-not-clobber-r7-in-clone-BZ-31402.patch new file mode 100644 index 0000000000000000000000000000000000000000..02bfee3718aa91afbae6c48775e3ce36e915fd32 --- /dev/null +++ b/0001-S390-Do-not-clobber-r7-in-clone-BZ-31402.patch @@ -0,0 +1,157 @@ +From ee4806e978467d705b26ccb7dfddb9e0a710f8e4 Mon Sep 17 00:00:00 2001 +From: Stefan Liebler +Date: Thu, 22 Feb 2024 15:03:27 +0100 +Subject: [PATCH 01/26] S390: Do not clobber r7 in clone [BZ #31402] + +Starting with commit e57d8fc97b90127de4ed3e3a9cdf663667580935 +"S390: Always use svc 0" +clone clobbers the call-saved register r7 in error case: +function or stack is NULL. + +This patch restores the saved registers also in the error case. +Furthermore the existing test misc/tst-clone is extended to check +all error cases and that clone does not clobber registers in this +error case. + +(cherry picked from commit 02782fd12849b6673cb5c2728cb750e8ec295aa3) +Note: Added ia64 __clone2 call to tst-clone.c. +--- + sysdeps/unix/sysv/linux/s390/s390-32/clone.S | 1 + + sysdeps/unix/sysv/linux/s390/s390-64/clone.S | 1 + + sysdeps/unix/sysv/linux/tst-clone.c | 76 ++++++++++++++++---- + 3 files changed, 65 insertions(+), 13 deletions(-) + +diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S +index 5d8d873383..fd1e509cf4 100644 +--- a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S ++++ b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S +@@ -53,6 +53,7 @@ ENTRY(__clone) + br %r14 + error: + lhi %r2,-EINVAL ++ lm %r6,%r7,24(%r15) /* Load registers. */ + j SYSCALL_ERROR_LABEL + PSEUDO_END (__clone) + +diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S +index f1c4288a3d..7b37b18010 100644 +--- a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S ++++ b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S +@@ -54,6 +54,7 @@ ENTRY(__clone) + br %r14 + error: + lghi %r2,-EINVAL ++ lmg %r6,%r7,48(%r15) /* Restore registers. */ + jg SYSCALL_ERROR_LABEL + PSEUDO_END (__clone) + +diff --git a/sysdeps/unix/sysv/linux/tst-clone.c b/sysdeps/unix/sysv/linux/tst-clone.c +index 56348707d4..95bd0f6ccb 100644 +--- a/sysdeps/unix/sysv/linux/tst-clone.c ++++ b/sysdeps/unix/sysv/linux/tst-clone.c +@@ -16,12 +16,16 @@ + License along with the GNU C Library; if not, see + . */ + +-/* BZ #2386 */ ++/* BZ #2386, BZ #31402 */ + #include + #include + #include + #include + #include ++#include /* For _STACK_GROWS_{UP,DOWN}. */ ++#include ++ ++volatile unsigned v = 0xdeadbeef; + + #ifdef __ia64__ + extern int __clone2 (int (*__fn) (void *__arg), void *__child_stack_base, +@@ -35,26 +39,72 @@ int child_fn(void *arg) + } + + static int +-do_test (void) ++__attribute__((noinline)) ++do_clone (int (*fn)(void *), void *stack) + { + int result; ++ unsigned int a = v; ++ unsigned int b = v; ++ unsigned int c = v; ++ unsigned int d = v; ++ unsigned int e = v; ++ unsigned int f = v; ++ unsigned int g = v; ++ unsigned int h = v; ++ unsigned int i = v; ++ unsigned int j = v; ++ unsigned int k = v; ++ unsigned int l = v; ++ unsigned int m = v; ++ unsigned int n = v; ++ unsigned int o = v; + + #ifdef __ia64__ +- result = __clone2 (child_fn, NULL, 0, 0, NULL, NULL, NULL); ++ result = __clone2 (fn, stack, stack != NULL ? 128 * 1024 : 0, 0, NULL, NULL, ++ NULL); ++#else ++ result = clone (fn, stack, 0, NULL); ++#endif ++ ++ /* Check that clone does not clobber call-saved registers. */ ++ TEST_VERIFY (a == v && b == v && c == v && d == v && e == v && f == v ++ && g == v && h == v && i == v && j == v && k == v && l == v ++ && m == v && n == v && o == v); ++ ++ return result; ++} ++ ++static void ++__attribute__((noinline)) ++do_test_single (int (*fn)(void *), void *stack) ++{ ++ printf ("%s (fn=%p, stack=%p)\n", __FUNCTION__, fn, stack); ++ errno = 0; ++ ++ int result = do_clone (fn, stack); ++ ++ TEST_COMPARE (errno, EINVAL); ++ TEST_COMPARE (result, -1); ++} ++ ++static int ++do_test (void) ++{ ++ char st[128 * 1024] __attribute__ ((aligned)); ++ void *stack = NULL; ++#if defined __ia64__ || _STACK_GROWS_UP ++ stack = st; ++#elif _STACK_GROWS_DOWN ++ stack = st + sizeof (st); + #else +- result = clone (child_fn, NULL, 0, NULL); ++# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP" + #endif + +- if (errno != EINVAL || result != -1) +- { +- printf ("FAIL: clone()=%d (wanted -1) errno=%d (wanted %d)\n", +- result, errno, EINVAL); +- return 1; +- } ++ do_test_single (child_fn, NULL); ++ do_test_single (NULL, stack); ++ do_test_single (NULL, NULL); + +- puts ("All OK"); + return 0; + } + +-#define TEST_FUNCTION do_test () +-#include "../test-skeleton.c" ++#include +-- +2.33.0 + diff --git a/0002-linux-Use-rseq-area-unconditionally-in-sched_getcpu-.patch b/0002-linux-Use-rseq-area-unconditionally-in-sched_getcpu-.patch new file mode 100644 index 0000000000000000000000000000000000000000..b3d890dd8beab5b8387b43c02bab969878b42378 --- /dev/null +++ b/0002-linux-Use-rseq-area-unconditionally-in-sched_getcpu-.patch @@ -0,0 +1,52 @@ +From 5753cda1ca0749002c4718122a9b6d5177087b7b Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Fri, 15 Mar 2024 19:08:24 +0100 +Subject: [PATCH 02/26] linux: Use rseq area unconditionally in sched_getcpu + (bug 31479) + +Originally, nptl/descr.h included , but we removed that +in commit 2c6b4b272e6b4d07303af25709051c3e96288f2d ("nptl: +Unconditionally use a 32-byte rseq area"). After that, it was +not ensured that the RSEQ_SIG macro was defined during sched_getcpu.c +compilation that provided a definition. This commit always checks +the rseq area for CPU number information before using the other +approaches. + +This adds an unnecessary (but well-predictable) branch on +architectures which do not define RSEQ_SIG, but its cost is small +compared to the system call. Most architectures that have vDSO +acceleration for getcpu also have rseq support. + +Fixes: 2c6b4b272e6b4d07303af25709051c3e96288f2d +Fixes: 1d350aa06091211863e41169729cee1bca39f72f +Reviewed-by: Arjun Shankar +(cherry picked from commit 7a76f218677d149d8b7875b336722108239f7ee9) +--- + sysdeps/unix/sysv/linux/sched_getcpu.c | 8 -------- + 1 file changed, 8 deletions(-) + +diff --git a/sysdeps/unix/sysv/linux/sched_getcpu.c b/sysdeps/unix/sysv/linux/sched_getcpu.c +index 4457d714bc..22700ef846 100644 +--- a/sysdeps/unix/sysv/linux/sched_getcpu.c ++++ b/sysdeps/unix/sysv/linux/sched_getcpu.c +@@ -33,17 +33,9 @@ vsyscall_sched_getcpu (void) + return r == -1 ? r : cpu; + } + +-#ifdef RSEQ_SIG + int + sched_getcpu (void) + { + int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id); + return __glibc_likely (cpu_id >= 0) ? cpu_id : vsyscall_sched_getcpu (); + } +-#else /* RSEQ_SIG */ +-int +-sched_getcpu (void) +-{ +- return vsyscall_sched_getcpu (); +-} +-#endif /* RSEQ_SIG */ +-- +2.33.0 + diff --git a/0003-LoongArch-Correct-__ieee754-_-_scalb-__ieee754-_-_sc.patch b/0003-LoongArch-Correct-__ieee754-_-_scalb-__ieee754-_-_sc.patch new file mode 100644 index 0000000000000000000000000000000000000000..580d498ab17e2dc52e0c33494233627991fb31a8 --- /dev/null +++ b/0003-LoongArch-Correct-__ieee754-_-_scalb-__ieee754-_-_sc.patch @@ -0,0 +1,23 @@ +From 0518bb0c16cb5986aaf35cf5e634964d7a06978b Mon Sep 17 00:00:00 2001 +From: caiyinyu +Date: Mon, 11 Mar 2024 16:07:48 +0800 +Subject: [PATCH 03/26] LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, + _}_scalbf + +--- + sysdeps/loongarch/fpu/e_scalbf.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sysdeps/loongarch/fpu/e_scalbf.c b/sysdeps/loongarch/fpu/e_scalbf.c +index c37b0fd19d..4690224621 100644 +--- a/sysdeps/loongarch/fpu/e_scalbf.c ++++ b/sysdeps/loongarch/fpu/e_scalbf.c +@@ -57,4 +57,4 @@ __ieee754_scalbf (float x, float fn) + + return x; + } +-libm_alias_finite (__ieee754_scalb, __scalb) ++libm_alias_finite (__ieee754_scalbf, __scalbf) +-- +2.33.0 + diff --git a/0004-Add-HWCAP2_MOPS-from-Linux-6.5-to-AArch64-bits-hwcap.patch b/0004-Add-HWCAP2_MOPS-from-Linux-6.5-to-AArch64-bits-hwcap.patch new file mode 100644 index 0000000000000000000000000000000000000000..2efca3deb05ef7c444bf9839c816f70df497158b --- /dev/null +++ b/0004-Add-HWCAP2_MOPS-from-Linux-6.5-to-AArch64-bits-hwcap.patch @@ -0,0 +1,27 @@ +From 5456ff5d80e45741a73cf9fa792d789a1ed17a09 Mon Sep 17 00:00:00 2001 +From: Joseph Myers +Date: Tue, 17 Oct 2023 13:13:27 +0000 +Subject: [PATCH 04/26] Add HWCAP2_MOPS from Linux 6.5 to AArch64 bits/hwcap.h + +Linux 6.5 adds a new AArch64 HWCAP2 value, HWCAP2_MOPS. Add it to +glibc's bits/hwcap.h. + +Tested with build-many-glibcs.py for aarch64-linux-gnu. + +(cherry picked from commit ff5d2abd18629e0efac41e31699cdff3be0e08fa) +--- + sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h +index 55c7ed39be..b251c2d417 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h ++++ b/sysdeps/unix/sysv/linux/aarch64/bits/hwcap.h +@@ -98,3 +98,4 @@ + #define HWCAP2_SME_BI32I32 (1UL << 40) + #define HWCAP2_SME_B16B16 (1UL << 41) + #define HWCAP2_SME_F16F16 (1UL << 42) ++#define HWCAP2_MOPS (1UL << 43) +-- +2.33.0 + diff --git a/0005-AArch64-Add-support-for-MOPS-memcpy-memmove-memset.patch b/0005-AArch64-Add-support-for-MOPS-memcpy-memmove-memset.patch new file mode 100644 index 0000000000000000000000000000000000000000..ef0ab27db4688f5f30d260aa9cab54e48ef52a3d --- /dev/null +++ b/0005-AArch64-Add-support-for-MOPS-memcpy-memmove-memset.patch @@ -0,0 +1,314 @@ +From d8a2b56b4fdf39488eb8a94f8b1064e262708b6f Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra +Date: Tue, 17 Oct 2023 16:54:21 +0100 +Subject: [PATCH 05/26] AArch64: Add support for MOPS memcpy/memmove/memset + +Add support for MOPS in cpu_features and INIT_ARCH. Add ifuncs using MOPS for +memcpy, memmove and memset (use .inst for now so it works with all binutils +versions without needing complex configure and conditional compilation). + +Reviewed-by: Szabolcs Nagy +(cherry picked from commit 2bd00179885928fd95fcabfafc50e7b5c6e660d2) +--- + sysdeps/aarch64/multiarch/Makefile | 3 ++ + sysdeps/aarch64/multiarch/ifunc-impl-list.c | 3 ++ + sysdeps/aarch64/multiarch/init-arch.h | 4 +- + sysdeps/aarch64/multiarch/memcpy.c | 4 ++ + sysdeps/aarch64/multiarch/memcpy_mops.S | 39 +++++++++++++++++++ + sysdeps/aarch64/multiarch/memmove.c | 4 ++ + sysdeps/aarch64/multiarch/memmove_mops.S | 39 +++++++++++++++++++ + sysdeps/aarch64/multiarch/memset.c | 4 ++ + sysdeps/aarch64/multiarch/memset_mops.S | 38 ++++++++++++++++++ + .../unix/sysv/linux/aarch64/cpu-features.c | 3 ++ + .../unix/sysv/linux/aarch64/cpu-features.h | 1 + + 11 files changed, 141 insertions(+), 1 deletion(-) + create mode 100644 sysdeps/aarch64/multiarch/memcpy_mops.S + create mode 100644 sysdeps/aarch64/multiarch/memmove_mops.S + create mode 100644 sysdeps/aarch64/multiarch/memset_mops.S + +diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile +index 223777d94e..e6099548b9 100644 +--- a/sysdeps/aarch64/multiarch/Makefile ++++ b/sysdeps/aarch64/multiarch/Makefile +@@ -5,14 +5,17 @@ sysdep_routines += \ + memcpy_a64fx \ + memcpy_falkor \ + memcpy_generic \ ++ memcpy_mops \ + memcpy_sve \ + memcpy_thunderx \ + memcpy_thunderx2 \ ++ memmove_mops \ + memset_a64fx \ + memset_emag \ + memset_falkor \ + memset_generic \ + memset_kunpeng \ ++ memset_mops \ + strlen_asimd \ + strlen_mte \ + # sysdep_routines +diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +index d274f01fdb..da7f115377 100644 +--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +@@ -41,6 +41,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_a64fx) + IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_sve) + #endif ++ IFUNC_IMPL_ADD (array, i, memcpy, mops, __memcpy_mops) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic)) + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx) +@@ -50,6 +51,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_a64fx) + IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_sve) + #endif ++ IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) + IFUNC_IMPL (i, name, memset, + /* Enable this on non-falkor processors too so that other cores +@@ -60,6 +62,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + #if HAVE_AARCH64_SVE_ASM + IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 256, __memset_a64fx) + #endif ++ IFUNC_IMPL_ADD (array, i, memset, mops, __memset_mops) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic)) + IFUNC_IMPL (i, name, memchr, + IFUNC_IMPL_ADD (array, i, memchr, !mte, __memchr_nosimd) +diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h +index 6de081e381..e23e6ff290 100644 +--- a/sysdeps/aarch64/multiarch/init-arch.h ++++ b/sysdeps/aarch64/multiarch/init-arch.h +@@ -35,4 +35,6 @@ + bool __attribute__((unused)) mte = \ + MTE_ENABLED (); \ + bool __attribute__((unused)) sve = \ +- GLRO(dl_aarch64_cpu_features).sve; ++ GLRO(dl_aarch64_cpu_features).sve; \ ++ bool __attribute__((unused)) mops = \ ++ GLRO(dl_aarch64_cpu_features).mops; +diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c +index 3aae915c5f..9aace954cb 100644 +--- a/sysdeps/aarch64/multiarch/memcpy.c ++++ b/sysdeps/aarch64/multiarch/memcpy.c +@@ -34,12 +34,16 @@ extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_a64fx attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_sve attribute_hidden; ++extern __typeof (__redirect_memcpy) __memcpy_mops attribute_hidden; + + static inline __typeof (__redirect_memcpy) * + select_memcpy_ifunc (void) + { + INIT_ARCH (); + ++ if (mops) ++ return __memcpy_mops; ++ + if (sve && HAVE_AARCH64_SVE_ASM) + { + if (IS_A64FX (midr)) +diff --git a/sysdeps/aarch64/multiarch/memcpy_mops.S b/sysdeps/aarch64/multiarch/memcpy_mops.S +new file mode 100644 +index 0000000000..4685629664 +--- /dev/null ++++ b/sysdeps/aarch64/multiarch/memcpy_mops.S +@@ -0,0 +1,39 @@ ++/* Optimized memcpy for MOPS. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++ ++/* Assumptions: ++ * ++ * AArch64, MOPS. ++ * ++ */ ++ ++ENTRY (__memcpy_mops) ++ PTR_ARG (0) ++ PTR_ARG (1) ++ SIZE_ARG (2) ++ ++ mov x3, x0 ++ .inst 0x19010443 /* cpyfp [x3]!, [x1]!, x2! */ ++ .inst 0x19410443 /* cpyfm [x3]!, [x1]!, x2! */ ++ .inst 0x19810443 /* cpyfe [x3]!, [x1]!, x2! */ ++ ret ++ ++END (__memcpy_mops) +diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c +index 312f90f111..fd346e7b73 100644 +--- a/sysdeps/aarch64/multiarch/memmove.c ++++ b/sysdeps/aarch64/multiarch/memmove.c +@@ -34,12 +34,16 @@ extern __typeof (__redirect_memmove) __memmove_thunderx2 attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_a64fx attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_sve attribute_hidden; ++extern __typeof (__redirect_memmove) __memmove_mops attribute_hidden; + + static inline __typeof (__redirect_memmove) * + select_memmove_ifunc (void) + { + INIT_ARCH (); + ++ if (mops) ++ return __memmove_mops; ++ + if (sve && HAVE_AARCH64_SVE_ASM) + { + if (IS_A64FX (midr)) +diff --git a/sysdeps/aarch64/multiarch/memmove_mops.S b/sysdeps/aarch64/multiarch/memmove_mops.S +new file mode 100644 +index 0000000000..c5ea66be3a +--- /dev/null ++++ b/sysdeps/aarch64/multiarch/memmove_mops.S +@@ -0,0 +1,39 @@ ++/* Optimized memmove for MOPS. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++ ++/* Assumptions: ++ * ++ * AArch64, MOPS. ++ * ++ */ ++ ++ENTRY (__memmove_mops) ++ PTR_ARG (0) ++ PTR_ARG (1) ++ SIZE_ARG (2) ++ ++ mov x3, x0 ++ .inst 0x1d010443 /* cpyp [x3]!, [x1]!, x2! */ ++ .inst 0x1d410443 /* cpym [x3]!, [x1]!, x2! */ ++ .inst 0x1d810443 /* cpye [x3]!, [x1]!, x2! */ ++ ret ++ ++END (__memmove_mops) +diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c +index f9c81d3d8e..23fc66e158 100644 +--- a/sysdeps/aarch64/multiarch/memset.c ++++ b/sysdeps/aarch64/multiarch/memset.c +@@ -33,12 +33,16 @@ extern __typeof (__redirect_memset) __memset_emag attribute_hidden; + extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden; + extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden; + extern __typeof (__redirect_memset) __memset_generic attribute_hidden; ++extern __typeof (__redirect_memset) __memset_mops attribute_hidden; + + static inline __typeof (__redirect_memset) * + select_memset_ifunc (void) + { + INIT_ARCH (); + ++ if (mops) ++ return __memset_mops; ++ + if (sve && HAVE_AARCH64_SVE_ASM) + { + if (IS_A64FX (midr) && zva_size == 256) +diff --git a/sysdeps/aarch64/multiarch/memset_mops.S b/sysdeps/aarch64/multiarch/memset_mops.S +new file mode 100644 +index 0000000000..ca820b8636 +--- /dev/null ++++ b/sysdeps/aarch64/multiarch/memset_mops.S +@@ -0,0 +1,38 @@ ++/* Optimized memset for MOPS. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++ ++/* Assumptions: ++ * ++ * AArch64, MOPS. ++ * ++ */ ++ ++ENTRY (__memset_mops) ++ PTR_ARG (0) ++ SIZE_ARG (2) ++ ++ mov x3, x0 ++ .inst 0x19c10443 /* setp [x3]!, x2!, x1 */ ++ .inst 0x19c14443 /* setm [x3]!, x2!, x1 */ ++ .inst 0x19c18443 /* sete [x3]!, x2!, x1 */ ++ ret ++ ++END (__memset_mops) +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +index dc09c1c827..233d5b2407 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +@@ -120,4 +120,7 @@ init_cpu_features (struct cpu_features *cpu_features) + + /* Check if SVE is supported. */ + cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE; ++ ++ /* Check if MOPS is supported. */ ++ cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS; + } +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +index d67d286b53..40b709677d 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +@@ -76,6 +76,7 @@ struct cpu_features + /* Currently, the GLIBC memory tagging tunable only defines 8 bits. */ + uint8_t mte_state; + bool sve; ++ bool mops; + }; + + #endif /* _CPU_FEATURES_AARCH64_H */ +-- +2.33.0 + diff --git a/0006-AArch64-Cleanup-ifuncs.patch b/0006-AArch64-Cleanup-ifuncs.patch new file mode 100644 index 0000000000000000000000000000000000000000..c1ce54b58a58f3f96deb198ba2348adfe88f668c --- /dev/null +++ b/0006-AArch64-Cleanup-ifuncs.patch @@ -0,0 +1,540 @@ +From 25b66e8c4a75b51b0122089cf6b99860fb05470d Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra +Date: Tue, 24 Oct 2023 13:51:07 +0100 +Subject: [PATCH 06/26] AArch64: Cleanup ifuncs + +Cleanup ifuncs. Remove uses of libc_hidden_builtin_def, use ENTRY rather than +ENTRY_ALIGN, remove unnecessary defines and conditional compilation. Rename +strlen_mte to strlen_generic. Remove rtld-memset. + +Reviewed-by: Szabolcs Nagy +(cherry picked from commit 9fd3409842b3e2d31cff5dbd6f96066c430f0aa2) +--- + sysdeps/aarch64/memset.S | 2 +- + sysdeps/aarch64/multiarch/Makefile | 2 +- + sysdeps/aarch64/multiarch/ifunc-impl-list.c | 2 +- + sysdeps/aarch64/multiarch/memchr_nosimd.S | 9 ++---- + sysdeps/aarch64/multiarch/memcpy_a64fx.S | 14 +++------- + sysdeps/aarch64/multiarch/memcpy_falkor.S | 6 ++-- + sysdeps/aarch64/multiarch/memcpy_sve.S | 2 -- + sysdeps/aarch64/multiarch/memcpy_thunderx.S | 27 ++++-------------- + sysdeps/aarch64/multiarch/memcpy_thunderx2.S | 28 +++---------------- + sysdeps/aarch64/multiarch/memset_a64fx.S | 8 ++---- + sysdeps/aarch64/multiarch/memset_base64.S | 3 +- + sysdeps/aarch64/multiarch/memset_emag.S | 8 ++---- + sysdeps/aarch64/multiarch/memset_generic.S | 8 +++++- + sysdeps/aarch64/multiarch/memset_kunpeng.S | 9 ++---- + sysdeps/aarch64/multiarch/rtld-memset.S | 25 ----------------- + sysdeps/aarch64/multiarch/strlen.c | 4 +-- + sysdeps/aarch64/multiarch/strlen_asimd.S | 1 - + .../{strlen_mte.S => strlen_generic.S} | 8 +++--- + 18 files changed, 41 insertions(+), 125 deletions(-) + delete mode 100644 sysdeps/aarch64/multiarch/rtld-memset.S + rename sysdeps/aarch64/multiarch/{strlen_mte.S => strlen_generic.S} (85%) + +diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S +index 50e5da3e7a..bf3cf85c8a 100644 +--- a/sysdeps/aarch64/memset.S ++++ b/sysdeps/aarch64/memset.S +@@ -29,7 +29,7 @@ + * + */ + +-ENTRY_ALIGN (MEMSET, 6) ++ENTRY (MEMSET) + + PTR_ARG (0) + SIZE_ARG (2) +diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile +index e6099548b9..a1a4de3cd9 100644 +--- a/sysdeps/aarch64/multiarch/Makefile ++++ b/sysdeps/aarch64/multiarch/Makefile +@@ -17,6 +17,6 @@ sysdep_routines += \ + memset_kunpeng \ + memset_mops \ + strlen_asimd \ +- strlen_mte \ ++ strlen_generic \ + # sysdep_routines + endif +diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +index da7f115377..836e8317a5 100644 +--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +@@ -70,7 +70,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + + IFUNC_IMPL (i, name, strlen, + IFUNC_IMPL_ADD (array, i, strlen, !mte, __strlen_asimd) +- IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_mte)) ++ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_generic)) + + return 0; + } +diff --git a/sysdeps/aarch64/multiarch/memchr_nosimd.S b/sysdeps/aarch64/multiarch/memchr_nosimd.S +index 57e48375e9..7800751899 100644 +--- a/sysdeps/aarch64/multiarch/memchr_nosimd.S ++++ b/sysdeps/aarch64/multiarch/memchr_nosimd.S +@@ -26,10 +26,6 @@ + * Use base integer registers. + */ + +-#ifndef MEMCHR +-# define MEMCHR __memchr_nosimd +-#endif +- + /* Arguments and results. */ + #define srcin x0 + #define chrin x1 +@@ -62,7 +58,7 @@ + #define REP8_7f 0x7f7f7f7f7f7f7f7f + + +-ENTRY_ALIGN (MEMCHR, 6) ++ENTRY (__memchr_nosimd) + + PTR_ARG (0) + SIZE_ARG (2) +@@ -219,5 +215,4 @@ L(none_chr): + mov result, 0 + ret + +-END (MEMCHR) +-libc_hidden_builtin_def (MEMCHR) ++END (__memchr_nosimd) +diff --git a/sysdeps/aarch64/multiarch/memcpy_a64fx.S b/sysdeps/aarch64/multiarch/memcpy_a64fx.S +index f89b5b670a..baff7e96d0 100644 +--- a/sysdeps/aarch64/multiarch/memcpy_a64fx.S ++++ b/sysdeps/aarch64/multiarch/memcpy_a64fx.S +@@ -39,9 +39,6 @@ + #define vlen8 x8 + + #if HAVE_AARCH64_SVE_ASM +-# if IS_IN (libc) +-# define MEMCPY __memcpy_a64fx +-# define MEMMOVE __memmove_a64fx + + .arch armv8.2-a+sve + +@@ -97,7 +94,7 @@ + #undef BTI_C + #define BTI_C + +-ENTRY (MEMCPY) ++ENTRY (__memcpy_a64fx) + + PTR_ARG (0) + PTR_ARG (1) +@@ -234,11 +231,10 @@ L(last_bytes): + st1b z3.b, p0, [dstend, -1, mul vl] + ret + +-END (MEMCPY) +-libc_hidden_builtin_def (MEMCPY) ++END (__memcpy_a64fx) + + +-ENTRY_ALIGN (MEMMOVE, 4) ++ENTRY_ALIGN (__memmove_a64fx, 4) + + PTR_ARG (0) + PTR_ARG (1) +@@ -307,7 +303,5 @@ L(full_overlap): + mov dst, dstin + b L(last_bytes) + +-END (MEMMOVE) +-libc_hidden_builtin_def (MEMMOVE) +-# endif /* IS_IN (libc) */ ++END (__memmove_a64fx) + #endif /* HAVE_AARCH64_SVE_ASM */ +diff --git a/sysdeps/aarch64/multiarch/memcpy_falkor.S b/sysdeps/aarch64/multiarch/memcpy_falkor.S +index ec0e4ade24..67c4ab34eb 100644 +--- a/sysdeps/aarch64/multiarch/memcpy_falkor.S ++++ b/sysdeps/aarch64/multiarch/memcpy_falkor.S +@@ -71,7 +71,7 @@ + The non-temporal stores help optimize cache utilization. */ + + #if IS_IN (libc) +-ENTRY_ALIGN (__memcpy_falkor, 6) ++ENTRY (__memcpy_falkor) + + PTR_ARG (0) + PTR_ARG (1) +@@ -198,7 +198,6 @@ L(loop64): + ret + + END (__memcpy_falkor) +-libc_hidden_builtin_def (__memcpy_falkor) + + + /* RATIONALE: +@@ -216,7 +215,7 @@ libc_hidden_builtin_def (__memcpy_falkor) + + For small and medium cases memcpy is used. */ + +-ENTRY_ALIGN (__memmove_falkor, 6) ++ENTRY (__memmove_falkor) + + PTR_ARG (0) + PTR_ARG (1) +@@ -311,5 +310,4 @@ L(move_long): + 3: ret + + END (__memmove_falkor) +-libc_hidden_builtin_def (__memmove_falkor) + #endif +diff --git a/sysdeps/aarch64/multiarch/memcpy_sve.S b/sysdeps/aarch64/multiarch/memcpy_sve.S +index d11be6a443..2f14f91366 100644 +--- a/sysdeps/aarch64/multiarch/memcpy_sve.S ++++ b/sysdeps/aarch64/multiarch/memcpy_sve.S +@@ -141,7 +141,6 @@ L(copy64_from_end): + ret + + END (__memcpy_sve) +-libc_hidden_builtin_def (__memcpy_sve) + + + ENTRY (__memmove_sve) +@@ -208,5 +207,4 @@ L(return): + ret + + END (__memmove_sve) +-libc_hidden_builtin_def (__memmove_sve) + #endif +diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S +index 366287587f..14269b1a47 100644 +--- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S ++++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S +@@ -65,21 +65,7 @@ + Overlapping large forward memmoves use a loop that copies backwards. + */ + +-#ifndef MEMMOVE +-# define MEMMOVE memmove +-#endif +-#ifndef MEMCPY +-# define MEMCPY memcpy +-#endif +- +-#if IS_IN (libc) +- +-# undef MEMCPY +-# define MEMCPY __memcpy_thunderx +-# undef MEMMOVE +-# define MEMMOVE __memmove_thunderx +- +-ENTRY_ALIGN (MEMMOVE, 6) ++ENTRY (__memmove_thunderx) + + PTR_ARG (0) + PTR_ARG (1) +@@ -91,9 +77,9 @@ ENTRY_ALIGN (MEMMOVE, 6) + b.lo L(move_long) + + /* Common case falls through into memcpy. */ +-END (MEMMOVE) +-libc_hidden_builtin_def (MEMMOVE) +-ENTRY (MEMCPY) ++END (__memmove_thunderx) ++ ++ENTRY (__memcpy_thunderx) + + PTR_ARG (0) + PTR_ARG (1) +@@ -316,7 +302,4 @@ L(move_long): + stp C_l, C_h, [dstin] + 3: ret + +-END (MEMCPY) +-libc_hidden_builtin_def (MEMCPY) +- +-#endif ++END (__memcpy_thunderx) +diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S +index d3d6f1debc..93993b9e03 100644 +--- a/sysdeps/aarch64/multiarch/memcpy_thunderx2.S ++++ b/sysdeps/aarch64/multiarch/memcpy_thunderx2.S +@@ -75,27 +75,12 @@ + #define I_v v16 + #define J_v v17 + +-#ifndef MEMMOVE +-# define MEMMOVE memmove +-#endif +-#ifndef MEMCPY +-# define MEMCPY memcpy +-#endif +- +-#if IS_IN (libc) +- +-#undef MEMCPY +-#define MEMCPY __memcpy_thunderx2 +-#undef MEMMOVE +-#define MEMMOVE __memmove_thunderx2 +- +- + /* Overlapping large forward memmoves use a loop that copies backwards. + Otherwise memcpy is used. Small moves branch to memcopy16 directly. + The longer memcpy cases fall through to the memcpy head. + */ + +-ENTRY_ALIGN (MEMMOVE, 6) ++ENTRY (__memmove_thunderx2) + + PTR_ARG (0) + PTR_ARG (1) +@@ -109,8 +94,7 @@ ENTRY_ALIGN (MEMMOVE, 6) + ccmp tmp1, count, 2, hi + b.lo L(move_long) + +-END (MEMMOVE) +-libc_hidden_builtin_def (MEMMOVE) ++END (__memmove_thunderx2) + + + /* Copies are split into 3 main cases: small copies of up to 16 bytes, +@@ -124,8 +108,7 @@ libc_hidden_builtin_def (MEMMOVE) + + #define MEMCPY_PREFETCH_LDR 640 + +- .p2align 4 +-ENTRY (MEMCPY) ++ENTRY (__memcpy_thunderx2) + + PTR_ARG (0) + PTR_ARG (1) +@@ -449,7 +432,7 @@ L(move_long): + 3: ret + + +-END (MEMCPY) ++END (__memcpy_thunderx2) + .section .rodata + .p2align 4 + +@@ -472,6 +455,3 @@ L(ext_table): + .word L(ext_size_13) -. + .word L(ext_size_14) -. + .word L(ext_size_15) -. +- +-libc_hidden_builtin_def (MEMCPY) +-#endif +diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S +index d520355143..7176f3d284 100644 +--- a/sysdeps/aarch64/multiarch/memset_a64fx.S ++++ b/sysdeps/aarch64/multiarch/memset_a64fx.S +@@ -33,8 +33,6 @@ + #define vector_length x9 + + #if HAVE_AARCH64_SVE_ASM +-# if IS_IN (libc) +-# define MEMSET __memset_a64fx + + .arch armv8.2-a+sve + +@@ -49,7 +47,7 @@ + #undef BTI_C + #define BTI_C + +-ENTRY (MEMSET) ++ENTRY (__memset_a64fx) + PTR_ARG (0) + SIZE_ARG (2) + +@@ -166,8 +164,6 @@ L(L2): + add count, count, CACHE_LINE_SIZE + b L(last) + +-END (MEMSET) +-libc_hidden_builtin_def (MEMSET) ++END (__memset_a64fx) + +-#endif /* IS_IN (libc) */ + #endif /* HAVE_AARCH64_SVE_ASM */ +diff --git a/sysdeps/aarch64/multiarch/memset_base64.S b/sysdeps/aarch64/multiarch/memset_base64.S +index 35296a6dec..0e8f709fa5 100644 +--- a/sysdeps/aarch64/multiarch/memset_base64.S ++++ b/sysdeps/aarch64/multiarch/memset_base64.S +@@ -34,7 +34,7 @@ + * + */ + +-ENTRY_ALIGN (MEMSET, 6) ++ENTRY (MEMSET) + + PTR_ARG (0) + SIZE_ARG (2) +@@ -183,4 +183,3 @@ L(zva_64): + #endif + + END (MEMSET) +-libc_hidden_builtin_def (MEMSET) +diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S +index 17d609cead..6fecad4fae 100644 +--- a/sysdeps/aarch64/multiarch/memset_emag.S ++++ b/sysdeps/aarch64/multiarch/memset_emag.S +@@ -19,8 +19,7 @@ + + #include + +-#if IS_IN (libc) +-# define MEMSET __memset_emag ++#define MEMSET __memset_emag + + /* + * Using DC ZVA to zero memory does not produce better performance if +@@ -30,7 +29,6 @@ + * workloads. + */ + +-# define DC_ZVA_THRESHOLD 0 ++#define DC_ZVA_THRESHOLD 0 + +-# include "./memset_base64.S" +-#endif ++#include "./memset_base64.S" +diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S +index 9c23e482bf..6c1f0daac8 100644 +--- a/sysdeps/aarch64/multiarch/memset_generic.S ++++ b/sysdeps/aarch64/multiarch/memset_generic.S +@@ -21,9 +21,15 @@ + + #if IS_IN (libc) + # define MEMSET __memset_generic ++ ++/* Do not hide the generic version of memset, we use it internally. */ ++# undef libc_hidden_builtin_def ++# define libc_hidden_builtin_def(name) ++ + /* Add a hidden definition for use within libc.so. */ + # ifdef SHARED + .globl __GI_memset; __GI_memset = __memset_generic + # endif +-# include + #endif ++ ++#include <../memset.S> +diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S +index 86c46434fd..4a54373398 100644 +--- a/sysdeps/aarch64/multiarch/memset_kunpeng.S ++++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S +@@ -20,16 +20,13 @@ + #include + #include + +-#if IS_IN (libc) +-# define MEMSET __memset_kunpeng +- + /* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses + * + */ + +-ENTRY_ALIGN (MEMSET, 6) ++ENTRY (__memset_kunpeng) + + PTR_ARG (0) + SIZE_ARG (2) +@@ -108,6 +105,4 @@ L(set_long): + stp q0, q0, [dstend, -32] + ret + +-END (MEMSET) +-libc_hidden_builtin_def (MEMSET) +-#endif ++END (__memset_kunpeng) +diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S +deleted file mode 100644 +index 4b035ed8b2..0000000000 +--- a/sysdeps/aarch64/multiarch/rtld-memset.S ++++ /dev/null +@@ -1,25 +0,0 @@ +-/* Memset for aarch64, for the dynamic linker. +- Copyright (C) 2017-2023 Free Software Foundation, Inc. +- +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library. If not, see +- . */ +- +-#include +- +-#if IS_IN (rtld) +-# define MEMSET memset +-# include +-#endif +diff --git a/sysdeps/aarch64/multiarch/strlen.c b/sysdeps/aarch64/multiarch/strlen.c +index bbdd3de8c4..728bd1936a 100644 +--- a/sysdeps/aarch64/multiarch/strlen.c ++++ b/sysdeps/aarch64/multiarch/strlen.c +@@ -28,10 +28,10 @@ + + extern __typeof (__redirect_strlen) __strlen; + +-extern __typeof (__redirect_strlen) __strlen_mte attribute_hidden; ++extern __typeof (__redirect_strlen) __strlen_generic attribute_hidden; + extern __typeof (__redirect_strlen) __strlen_asimd attribute_hidden; + +-libc_ifunc (__strlen, (mte ? __strlen_mte : __strlen_asimd)); ++libc_ifunc (__strlen, (mte ? __strlen_generic : __strlen_asimd)); + + # undef strlen + strong_alias (__strlen, strlen); +diff --git a/sysdeps/aarch64/multiarch/strlen_asimd.S b/sysdeps/aarch64/multiarch/strlen_asimd.S +index 490439491d..aee5ef9f78 100644 +--- a/sysdeps/aarch64/multiarch/strlen_asimd.S ++++ b/sysdeps/aarch64/multiarch/strlen_asimd.S +@@ -203,4 +203,3 @@ L(page_cross): + ret + + END (__strlen_asimd) +-libc_hidden_builtin_def (__strlen_asimd) +diff --git a/sysdeps/aarch64/multiarch/strlen_mte.S b/sysdeps/aarch64/multiarch/strlen_generic.S +similarity index 85% +rename from sysdeps/aarch64/multiarch/strlen_mte.S +rename to sysdeps/aarch64/multiarch/strlen_generic.S +index 1c1220b767..2346296a18 100644 +--- a/sysdeps/aarch64/multiarch/strlen_mte.S ++++ b/sysdeps/aarch64/multiarch/strlen_generic.S +@@ -17,14 +17,14 @@ + . */ + + /* The actual strlen code is in ../strlen.S. If we are building libc this file +- defines __strlen_mte. Otherwise the include of ../strlen.S will define +- the normal __strlen entry points. */ ++ defines __strlen_generic. Otherwise the include of ../strlen.S will define ++ the normal __strlen entry points. */ + + #include + + #if IS_IN (libc) + +-# define STRLEN __strlen_mte ++# define STRLEN __strlen_generic + + /* Do not hide the generic version of strlen, we use it internally. */ + # undef libc_hidden_builtin_def +@@ -32,7 +32,7 @@ + + # ifdef SHARED + /* It doesn't make sense to send libc-internal strlen calls through a PLT. */ +- .globl __GI_strlen; __GI_strlen = __strlen_mte ++ .globl __GI_strlen; __GI_strlen = __strlen_generic + # endif + #endif + +-- +2.33.0 + diff --git a/0007-AArch64-Cleanup-emag-memset.patch b/0007-AArch64-Cleanup-emag-memset.patch new file mode 100644 index 0000000000000000000000000000000000000000..c579cfa45fbf66c391c3efcb9458b20a878a1ffa --- /dev/null +++ b/0007-AArch64-Cleanup-emag-memset.patch @@ -0,0 +1,348 @@ +From 1521237c3211bb0b1a8f7a9c5793d382789b2b68 Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra +Date: Thu, 26 Oct 2023 16:34:47 +0100 +Subject: [PATCH 07/26] AArch64: Cleanup emag memset + +Cleanup emag memset - merge the memset_base64.S file, remove +the unused ZVA code (since it is disabled on emag). + +Reviewed-by: Adhemerval Zanella +(cherry picked from commit 9627ab99b50d250c6dd3001a3355aa03692f7fe5) +--- + sysdeps/aarch64/multiarch/ifunc-impl-list.c | 2 +- + sysdeps/aarch64/multiarch/memset.c | 2 +- + sysdeps/aarch64/multiarch/memset_base64.S | 185 -------------------- + sysdeps/aarch64/multiarch/memset_emag.S | 98 +++++++++-- + 4 files changed, 90 insertions(+), 197 deletions(-) + delete mode 100644 sysdeps/aarch64/multiarch/memset_base64.S + +diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +index 836e8317a5..3596d3c8d3 100644 +--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +@@ -57,7 +57,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + /* Enable this on non-falkor processors too so that other cores + can do a comparative analysis with __memset_generic. */ + IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor) +- IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_emag) ++ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng) + #if HAVE_AARCH64_SVE_ASM + IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 256, __memset_a64fx) +diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c +index 23fc66e158..9193b197dd 100644 +--- a/sysdeps/aarch64/multiarch/memset.c ++++ b/sysdeps/aarch64/multiarch/memset.c +@@ -56,7 +56,7 @@ select_memset_ifunc (void) + if ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64) + return __memset_falkor; + +- if (IS_EMAG (midr) && zva_size == 64) ++ if (IS_EMAG (midr)) + return __memset_emag; + + return __memset_generic; +diff --git a/sysdeps/aarch64/multiarch/memset_base64.S b/sysdeps/aarch64/multiarch/memset_base64.S +deleted file mode 100644 +index 0e8f709fa5..0000000000 +--- a/sysdeps/aarch64/multiarch/memset_base64.S ++++ /dev/null +@@ -1,185 +0,0 @@ +-/* Copyright (C) 2018-2023 Free Software Foundation, Inc. +- +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library. If not, see +- . */ +- +-#include +-#include "memset-reg.h" +- +-#ifndef MEMSET +-# define MEMSET __memset_base64 +-#endif +- +-/* To disable DC ZVA, set this threshold to 0. */ +-#ifndef DC_ZVA_THRESHOLD +-# define DC_ZVA_THRESHOLD 512 +-#endif +- +-/* Assumptions: +- * +- * ARMv8-a, AArch64, unaligned accesses +- * +- */ +- +-ENTRY (MEMSET) +- +- PTR_ARG (0) +- SIZE_ARG (2) +- +- bfi valw, valw, 8, 8 +- bfi valw, valw, 16, 16 +- bfi val, val, 32, 32 +- +- add dstend, dstin, count +- +- cmp count, 96 +- b.hi L(set_long) +- cmp count, 16 +- b.hs L(set_medium) +- +- /* Set 0..15 bytes. */ +- tbz count, 3, 1f +- str val, [dstin] +- str val, [dstend, -8] +- ret +- +- .p2align 3 +-1: tbz count, 2, 2f +- str valw, [dstin] +- str valw, [dstend, -4] +- ret +-2: cbz count, 3f +- strb valw, [dstin] +- tbz count, 1, 3f +- strh valw, [dstend, -2] +-3: ret +- +- .p2align 3 +- /* Set 16..96 bytes. */ +-L(set_medium): +- stp val, val, [dstin] +- tbnz count, 6, L(set96) +- stp val, val, [dstend, -16] +- tbz count, 5, 1f +- stp val, val, [dstin, 16] +- stp val, val, [dstend, -32] +-1: ret +- +- .p2align 4 +- /* Set 64..96 bytes. Write 64 bytes from the start and +- 32 bytes from the end. */ +-L(set96): +- stp val, val, [dstin, 16] +- stp val, val, [dstin, 32] +- stp val, val, [dstin, 48] +- stp val, val, [dstend, -32] +- stp val, val, [dstend, -16] +- ret +- +- .p2align 4 +-L(set_long): +- stp val, val, [dstin] +- bic dst, dstin, 15 +-#if DC_ZVA_THRESHOLD +- cmp count, DC_ZVA_THRESHOLD +- ccmp val, 0, 0, cs +- b.eq L(zva_64) +-#endif +- /* Small-size or non-zero memset does not use DC ZVA. */ +- sub count, dstend, dst +- +- /* +- * Adjust count and bias for loop. By subtracting extra 1 from count, +- * it is easy to use tbz instruction to check whether loop tailing +- * count is less than 33 bytes, so as to bypass 2 unnecessary stps. +- */ +- sub count, count, 64+16+1 +- +-#if DC_ZVA_THRESHOLD +- /* Align loop on 16-byte boundary, this might be friendly to i-cache. */ +- nop +-#endif +- +-1: stp val, val, [dst, 16] +- stp val, val, [dst, 32] +- stp val, val, [dst, 48] +- stp val, val, [dst, 64]! +- subs count, count, 64 +- b.hs 1b +- +- tbz count, 5, 1f /* Remaining count is less than 33 bytes? */ +- stp val, val, [dst, 16] +- stp val, val, [dst, 32] +-1: stp val, val, [dstend, -32] +- stp val, val, [dstend, -16] +- ret +- +-#if DC_ZVA_THRESHOLD +- .p2align 3 +-L(zva_64): +- stp val, val, [dst, 16] +- stp val, val, [dst, 32] +- stp val, val, [dst, 48] +- bic dst, dst, 63 +- +- /* +- * Previous memory writes might cross cache line boundary, and cause +- * cache line partially dirty. Zeroing this kind of cache line using +- * DC ZVA will incur extra cost, for it requires loading untouched +- * part of the line from memory before zeoring. +- * +- * So, write the first 64 byte aligned block using stp to force +- * fully dirty cache line. +- */ +- stp val, val, [dst, 64] +- stp val, val, [dst, 80] +- stp val, val, [dst, 96] +- stp val, val, [dst, 112] +- +- sub count, dstend, dst +- /* +- * Adjust count and bias for loop. By subtracting extra 1 from count, +- * it is easy to use tbz instruction to check whether loop tailing +- * count is less than 33 bytes, so as to bypass 2 unnecessary stps. +- */ +- sub count, count, 128+64+64+1 +- add dst, dst, 128 +- nop +- +- /* DC ZVA sets 64 bytes each time. */ +-1: dc zva, dst +- add dst, dst, 64 +- subs count, count, 64 +- b.hs 1b +- +- /* +- * Write the last 64 byte aligned block using stp to force fully +- * dirty cache line. +- */ +- stp val, val, [dst, 0] +- stp val, val, [dst, 16] +- stp val, val, [dst, 32] +- stp val, val, [dst, 48] +- +- tbz count, 5, 1f /* Remaining count is less than 33 bytes? */ +- stp val, val, [dst, 64] +- stp val, val, [dst, 80] +-1: stp val, val, [dstend, -32] +- stp val, val, [dstend, -16] +- ret +-#endif +- +-END (MEMSET) +diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S +index 6fecad4fae..bbfa815925 100644 +--- a/sysdeps/aarch64/multiarch/memset_emag.S ++++ b/sysdeps/aarch64/multiarch/memset_emag.S +@@ -18,17 +18,95 @@ + . */ + + #include ++#include "memset-reg.h" + +-#define MEMSET __memset_emag +- +-/* +- * Using DC ZVA to zero memory does not produce better performance if +- * memory size is not very large, especially when there are multiple +- * processes/threads contending memory/cache. Here we set threshold to +- * zero to disable using DC ZVA, which is good for multi-process/thread +- * workloads. ++/* Assumptions: ++ * ++ * ARMv8-a, AArch64, unaligned accesses ++ * + */ + +-#define DC_ZVA_THRESHOLD 0 ++ENTRY (__memset_emag) ++ ++ PTR_ARG (0) ++ SIZE_ARG (2) ++ ++ bfi valw, valw, 8, 8 ++ bfi valw, valw, 16, 16 ++ bfi val, val, 32, 32 ++ ++ add dstend, dstin, count ++ ++ cmp count, 96 ++ b.hi L(set_long) ++ cmp count, 16 ++ b.hs L(set_medium) ++ ++ /* Set 0..15 bytes. */ ++ tbz count, 3, 1f ++ str val, [dstin] ++ str val, [dstend, -8] ++ ret ++ ++ .p2align 3 ++1: tbz count, 2, 2f ++ str valw, [dstin] ++ str valw, [dstend, -4] ++ ret ++2: cbz count, 3f ++ strb valw, [dstin] ++ tbz count, 1, 3f ++ strh valw, [dstend, -2] ++3: ret ++ ++ .p2align 3 ++ /* Set 16..96 bytes. */ ++L(set_medium): ++ stp val, val, [dstin] ++ tbnz count, 6, L(set96) ++ stp val, val, [dstend, -16] ++ tbz count, 5, 1f ++ stp val, val, [dstin, 16] ++ stp val, val, [dstend, -32] ++1: ret ++ ++ .p2align 4 ++ /* Set 64..96 bytes. Write 64 bytes from the start and ++ 32 bytes from the end. */ ++L(set96): ++ stp val, val, [dstin, 16] ++ stp val, val, [dstin, 32] ++ stp val, val, [dstin, 48] ++ stp val, val, [dstend, -32] ++ stp val, val, [dstend, -16] ++ ret ++ ++ .p2align 4 ++L(set_long): ++ stp val, val, [dstin] ++ bic dst, dstin, 15 ++ /* Small-size or non-zero memset does not use DC ZVA. */ ++ sub count, dstend, dst ++ ++ /* ++ * Adjust count and bias for loop. By subtracting extra 1 from count, ++ * it is easy to use tbz instruction to check whether loop tailing ++ * count is less than 33 bytes, so as to bypass 2 unnecessary stps. ++ */ ++ sub count, count, 64+16+1 ++ ++1: stp val, val, [dst, 16] ++ stp val, val, [dst, 32] ++ stp val, val, [dst, 48] ++ stp val, val, [dst, 64]! ++ subs count, count, 64 ++ b.hs 1b ++ ++ tbz count, 5, 1f /* Remaining count is less than 33 bytes? */ ++ stp val, val, [dst, 16] ++ stp val, val, [dst, 32] ++1: stp val, val, [dstend, -32] ++ stp val, val, [dstend, -16] ++ ret + +-#include "./memset_base64.S" ++END (__memset_emag) +-- +2.33.0 + diff --git a/0008-AArch64-Add-memset_zva64.patch b/0008-AArch64-Add-memset_zva64.patch new file mode 100644 index 0000000000000000000000000000000000000000..52258161d73e27d402c0d079a114e4540333c3d1 --- /dev/null +++ b/0008-AArch64-Add-memset_zva64.patch @@ -0,0 +1,228 @@ +From 156e44845f4137d6d3ea6c2824dd459652a7efda Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra +Date: Thu, 26 Oct 2023 17:07:21 +0100 +Subject: [PATCH 08/26] AArch64: Add memset_zva64 + +Add a specialized memset for the common ZVA size of 64 to avoid the +overhead of reading the ZVA size. Since the code is identical to +__memset_falkor, remove the latter. + +Reviewed-by: Adhemerval Zanella +(cherry picked from commit 3d7090f14b13312320e425b27dcf0fe72de026fd) +--- + sysdeps/aarch64/memset.S | 10 ++-- + sysdeps/aarch64/multiarch/Makefile | 2 +- + sysdeps/aarch64/multiarch/ifunc-impl-list.c | 4 +- + sysdeps/aarch64/multiarch/memset.c | 9 ++-- + sysdeps/aarch64/multiarch/memset_falkor.S | 54 --------------------- + sysdeps/aarch64/multiarch/memset_zva64.S | 27 +++++++++++ + 6 files changed, 38 insertions(+), 68 deletions(-) + delete mode 100644 sysdeps/aarch64/multiarch/memset_falkor.S + create mode 100644 sysdeps/aarch64/multiarch/memset_zva64.S + +diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S +index bf3cf85c8a..bbfb7184c3 100644 +--- a/sysdeps/aarch64/memset.S ++++ b/sysdeps/aarch64/memset.S +@@ -101,19 +101,19 @@ L(tail64): + ret + + L(try_zva): +-#ifdef ZVA_MACRO +- zva_macro +-#else ++#ifndef ZVA64_ONLY + .p2align 3 + mrs tmp1, dczid_el0 + tbnz tmp1w, 4, L(no_zva) + and tmp1w, tmp1w, 15 + cmp tmp1w, 4 /* ZVA size is 64 bytes. */ + b.ne L(zva_128) +- ++ nop ++#endif + /* Write the first and last 64 byte aligned block using stp rather + than using DC ZVA. This is faster on some cores. + */ ++ .p2align 4 + L(zva_64): + str q0, [dst, 16] + stp q0, q0, [dst, 32] +@@ -123,7 +123,6 @@ L(zva_64): + sub count, dstend, dst /* Count is now 128 too large. */ + sub count, count, 128+64+64 /* Adjust count and bias for loop. */ + add dst, dst, 128 +- nop + 1: dc zva, dst + add dst, dst, 64 + subs count, count, 64 +@@ -134,6 +133,7 @@ L(zva_64): + stp q0, q0, [dstend, -32] + ret + ++#ifndef ZVA64_ONLY + .p2align 3 + L(zva_128): + cmp tmp1w, 5 /* ZVA size is 128 bytes. */ +diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile +index a1a4de3cd9..171ca5e4cf 100644 +--- a/sysdeps/aarch64/multiarch/Makefile ++++ b/sysdeps/aarch64/multiarch/Makefile +@@ -12,10 +12,10 @@ sysdep_routines += \ + memmove_mops \ + memset_a64fx \ + memset_emag \ +- memset_falkor \ + memset_generic \ + memset_kunpeng \ + memset_mops \ ++ memset_zva64 \ + strlen_asimd \ + strlen_generic \ + # sysdep_routines +diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +index 3596d3c8d3..fdd9ea9246 100644 +--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +@@ -54,9 +54,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops) + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) + IFUNC_IMPL (i, name, memset, +- /* Enable this on non-falkor processors too so that other cores +- can do a comparative analysis with __memset_generic. */ +- IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor) ++ IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva64) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng) + #if HAVE_AARCH64_SVE_ASM +diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c +index 9193b197dd..6deb6865e5 100644 +--- a/sysdeps/aarch64/multiarch/memset.c ++++ b/sysdeps/aarch64/multiarch/memset.c +@@ -28,7 +28,7 @@ + + extern __typeof (__redirect_memset) __libc_memset; + +-extern __typeof (__redirect_memset) __memset_falkor attribute_hidden; ++extern __typeof (__redirect_memset) __memset_zva64 attribute_hidden; + extern __typeof (__redirect_memset) __memset_emag attribute_hidden; + extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden; + extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden; +@@ -47,18 +47,17 @@ select_memset_ifunc (void) + { + if (IS_A64FX (midr) && zva_size == 256) + return __memset_a64fx; +- return __memset_generic; + } + + if (IS_KUNPENG920 (midr)) + return __memset_kunpeng; + +- if ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64) +- return __memset_falkor; +- + if (IS_EMAG (midr)) + return __memset_emag; + ++ if (zva_size == 64) ++ return __memset_zva64; ++ + return __memset_generic; + } + +diff --git a/sysdeps/aarch64/multiarch/memset_falkor.S b/sysdeps/aarch64/multiarch/memset_falkor.S +deleted file mode 100644 +index c6946a8072..0000000000 +--- a/sysdeps/aarch64/multiarch/memset_falkor.S ++++ /dev/null +@@ -1,54 +0,0 @@ +-/* Memset for falkor. +- Copyright (C) 2017-2023 Free Software Foundation, Inc. +- +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library. If not, see +- . */ +- +-#include +-#include +- +-/* Reading dczid_el0 is expensive on falkor so move it into the ifunc +- resolver and assume ZVA size of 64 bytes. The IFUNC resolver takes care to +- use this function only when ZVA is enabled. */ +- +-#if IS_IN (libc) +-.macro zva_macro +- .p2align 4 +- /* Write the first and last 64 byte aligned block using stp rather +- than using DC ZVA. This is faster on some cores. */ +- str q0, [dst, 16] +- stp q0, q0, [dst, 32] +- bic dst, dst, 63 +- stp q0, q0, [dst, 64] +- stp q0, q0, [dst, 96] +- sub count, dstend, dst /* Count is now 128 too large. */ +- sub count, count, 128+64+64 /* Adjust count and bias for loop. */ +- add dst, dst, 128 +-1: dc zva, dst +- add dst, dst, 64 +- subs count, count, 64 +- b.hi 1b +- stp q0, q0, [dst, 0] +- stp q0, q0, [dst, 32] +- stp q0, q0, [dstend, -64] +- stp q0, q0, [dstend, -32] +- ret +-.endm +- +-# define ZVA_MACRO zva_macro +-# define MEMSET __memset_falkor +-# include +-#endif +diff --git a/sysdeps/aarch64/multiarch/memset_zva64.S b/sysdeps/aarch64/multiarch/memset_zva64.S +new file mode 100644 +index 0000000000..13f45fd3d8 +--- /dev/null ++++ b/sysdeps/aarch64/multiarch/memset_zva64.S +@@ -0,0 +1,27 @@ ++/* Optimized memset for zva size = 64. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++ ++#define ZVA64_ONLY 1 ++#define MEMSET __memset_zva64 ++#undef libc_hidden_builtin_def ++#define libc_hidden_builtin_def(X) ++ ++#include "../memset.S" +-- +2.33.0 + diff --git a/0009-AArch64-Remove-Falkor-memcpy.patch b/0009-AArch64-Remove-Falkor-memcpy.patch new file mode 100644 index 0000000000000000000000000000000000000000..4efda02f8260212c4a4583a054a09186c8f15371 --- /dev/null +++ b/0009-AArch64-Remove-Falkor-memcpy.patch @@ -0,0 +1,468 @@ +From a08ff922946dca0303a270bbfa2557f74caa47aa Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra +Date: Thu, 26 Oct 2023 17:30:36 +0100 +Subject: [PATCH 09/26] AArch64: Remove Falkor memcpy + +The latest implementations of memcpy are actually faster than the Falkor +implementations [1], so remove the falkor/phecda ifuncs for memcpy and +the now unused IS_FALKOR/IS_PHECDA defines. + +[1] https://sourceware.org/pipermail/libc-alpha/2022-December/144227.html + +Reviewed-by: Adhemerval Zanella +(cherry picked from commit 2f5524cc5381eb75fef55f7901bb907bd5628333) +--- + manual/tunables.texi | 2 +- + sysdeps/aarch64/multiarch/Makefile | 1 - + sysdeps/aarch64/multiarch/ifunc-impl-list.c | 2 - + sysdeps/aarch64/multiarch/memcpy.c | 4 - + sysdeps/aarch64/multiarch/memcpy_falkor.S | 313 ------------------ + sysdeps/aarch64/multiarch/memmove.c | 4 - + .../unix/sysv/linux/aarch64/cpu-features.c | 2 - + .../unix/sysv/linux/aarch64/cpu-features.h | 5 - + 8 files changed, 1 insertion(+), 332 deletions(-) + delete mode 100644 sysdeps/aarch64/multiarch/memcpy_falkor.S + +diff --git a/manual/tunables.texi b/manual/tunables.texi +index 4ca0e42a11..bb17fef5bd 100644 +--- a/manual/tunables.texi ++++ b/manual/tunables.texi +@@ -529,7 +529,7 @@ This tunable is specific to powerpc, powerpc64 and powerpc64le. + @deftp Tunable glibc.cpu.name + The @code{glibc.cpu.name=xxx} tunable allows the user to tell @theglibc{} to + assume that the CPU is @code{xxx} where xxx may have one of these values: +-@code{generic}, @code{falkor}, @code{thunderxt88}, @code{thunderx2t99}, ++@code{generic}, @code{thunderxt88}, @code{thunderx2t99}, + @code{thunderx2t99p1}, @code{ares}, @code{emag}, @code{kunpeng}, + @code{a64fx}. + +diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile +index 171ca5e4cf..e4720b7468 100644 +--- a/sysdeps/aarch64/multiarch/Makefile ++++ b/sysdeps/aarch64/multiarch/Makefile +@@ -3,7 +3,6 @@ sysdep_routines += \ + memchr_generic \ + memchr_nosimd \ + memcpy_a64fx \ +- memcpy_falkor \ + memcpy_generic \ + memcpy_mops \ + memcpy_sve \ +diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +index fdd9ea9246..73038ac810 100644 +--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +@@ -36,7 +36,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx) + IFUNC_IMPL_ADD (array, i, memcpy, !bti, __memcpy_thunderx2) +- IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor) + #if HAVE_AARCH64_SVE_ASM + IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_a64fx) + IFUNC_IMPL_ADD (array, i, memcpy, sve, __memcpy_sve) +@@ -46,7 +45,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + IFUNC_IMPL (i, name, memmove, + IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx) + IFUNC_IMPL_ADD (array, i, memmove, !bti, __memmove_thunderx2) +- IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor) + #if HAVE_AARCH64_SVE_ASM + IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_a64fx) + IFUNC_IMPL_ADD (array, i, memmove, sve, __memmove_sve) +diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c +index 9aace954cb..6471fe82e3 100644 +--- a/sysdeps/aarch64/multiarch/memcpy.c ++++ b/sysdeps/aarch64/multiarch/memcpy.c +@@ -31,7 +31,6 @@ extern __typeof (__redirect_memcpy) __libc_memcpy; + extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden; +-extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_a64fx attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_sve attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_mops attribute_hidden; +@@ -57,9 +56,6 @@ select_memcpy_ifunc (void) + if (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)) + return __memcpy_thunderx2; + +- if (IS_FALKOR (midr) || IS_PHECDA (midr)) +- return __memcpy_falkor; +- + return __memcpy_generic; + } + +diff --git a/sysdeps/aarch64/multiarch/memcpy_falkor.S b/sysdeps/aarch64/multiarch/memcpy_falkor.S +deleted file mode 100644 +index 67c4ab34eb..0000000000 +--- a/sysdeps/aarch64/multiarch/memcpy_falkor.S ++++ /dev/null +@@ -1,313 +0,0 @@ +-/* Optimized memcpy for Qualcomm Falkor processor. +- Copyright (C) 2017-2023 Free Software Foundation, Inc. +- +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library. If not, see +- . */ +- +-#include +- +-/* Assumptions: +- +- ARMv8-a, AArch64, falkor, unaligned accesses. */ +- +-#define dstin x0 +-#define src x1 +-#define count x2 +-#define dst x3 +-#define srcend x4 +-#define dstend x5 +-#define tmp1 x14 +-#define A_x x6 +-#define B_x x7 +-#define A_w w6 +-#define B_w w7 +- +-#define A_q q0 +-#define B_q q1 +-#define C_q q2 +-#define D_q q3 +-#define E_q q4 +-#define F_q q5 +-#define G_q q6 +-#define H_q q7 +-#define Q_q q6 +-#define S_q q22 +- +-/* Copies are split into 3 main cases: +- +- 1. Small copies of up to 32 bytes +- 2. Medium copies of 33..128 bytes which are fully unrolled +- 3. Large copies of more than 128 bytes. +- +- Large copies align the source to a quad word and use an unrolled loop +- processing 64 bytes per iteration. +- +- FALKOR-SPECIFIC DESIGN: +- +- The smallest copies (32 bytes or less) focus on optimal pipeline usage, +- which is why the redundant copies of 0-3 bytes have been replaced with +- conditionals, since the former would unnecessarily break across multiple +- issue groups. The medium copy group has been enlarged to 128 bytes since +- bumping up the small copies up to 32 bytes allows us to do that without +- cost and also allows us to reduce the size of the prep code before loop64. +- +- The copy loop uses only one register q0. This is to ensure that all loads +- hit a single hardware prefetcher which can get correctly trained to prefetch +- a single stream. +- +- The non-temporal stores help optimize cache utilization. */ +- +-#if IS_IN (libc) +-ENTRY (__memcpy_falkor) +- +- PTR_ARG (0) +- PTR_ARG (1) +- SIZE_ARG (2) +- +- cmp count, 32 +- add srcend, src, count +- add dstend, dstin, count +- b.ls L(copy32) +- cmp count, 128 +- b.hi L(copy_long) +- +- /* Medium copies: 33..128 bytes. */ +-L(copy128): +- sub tmp1, count, 1 +- ldr A_q, [src] +- ldr B_q, [src, 16] +- ldr C_q, [srcend, -32] +- ldr D_q, [srcend, -16] +- tbz tmp1, 6, 1f +- ldr E_q, [src, 32] +- ldr F_q, [src, 48] +- ldr G_q, [srcend, -64] +- ldr H_q, [srcend, -48] +- str G_q, [dstend, -64] +- str H_q, [dstend, -48] +- str E_q, [dstin, 32] +- str F_q, [dstin, 48] +-1: +- str A_q, [dstin] +- str B_q, [dstin, 16] +- str C_q, [dstend, -32] +- str D_q, [dstend, -16] +- ret +- +- .p2align 4 +- /* Small copies: 0..32 bytes. */ +-L(copy32): +- /* 16-32 */ +- cmp count, 16 +- b.lo 1f +- ldr A_q, [src] +- ldr B_q, [srcend, -16] +- str A_q, [dstin] +- str B_q, [dstend, -16] +- ret +- .p2align 4 +-1: +- /* 8-15 */ +- tbz count, 3, 1f +- ldr A_x, [src] +- ldr B_x, [srcend, -8] +- str A_x, [dstin] +- str B_x, [dstend, -8] +- ret +- .p2align 4 +-1: +- /* 4-7 */ +- tbz count, 2, 1f +- ldr A_w, [src] +- ldr B_w, [srcend, -4] +- str A_w, [dstin] +- str B_w, [dstend, -4] +- ret +- .p2align 4 +-1: +- /* 2-3 */ +- tbz count, 1, 1f +- ldrh A_w, [src] +- ldrh B_w, [srcend, -2] +- strh A_w, [dstin] +- strh B_w, [dstend, -2] +- ret +- .p2align 4 +-1: +- /* 0-1 */ +- tbz count, 0, 1f +- ldrb A_w, [src] +- strb A_w, [dstin] +-1: +- ret +- +- /* Align SRC to 16 bytes and copy; that way at least one of the +- accesses is aligned throughout the copy sequence. +- +- The count is off by 0 to 15 bytes, but this is OK because we trim +- off the last 64 bytes to copy off from the end. Due to this the +- loop never runs out of bounds. */ +- +- .p2align 4 +- nop /* Align loop64 below. */ +-L(copy_long): +- ldr A_q, [src] +- sub count, count, 64 + 16 +- and tmp1, src, 15 +- str A_q, [dstin] +- bic src, src, 15 +- sub dst, dstin, tmp1 +- add count, count, tmp1 +- +-L(loop64): +- ldr A_q, [src, 16]! +- str A_q, [dst, 16] +- ldr A_q, [src, 16]! +- subs count, count, 64 +- str A_q, [dst, 32] +- ldr A_q, [src, 16]! +- str A_q, [dst, 48] +- ldr A_q, [src, 16]! +- str A_q, [dst, 64]! +- b.hi L(loop64) +- +- /* Write the last full set of 64 bytes. The remainder is at most 64 +- bytes, so it is safe to always copy 64 bytes from the end even if +- there is just 1 byte left. */ +- ldr E_q, [srcend, -64] +- str E_q, [dstend, -64] +- ldr D_q, [srcend, -48] +- str D_q, [dstend, -48] +- ldr C_q, [srcend, -32] +- str C_q, [dstend, -32] +- ldr B_q, [srcend, -16] +- str B_q, [dstend, -16] +- ret +- +-END (__memcpy_falkor) +- +- +-/* RATIONALE: +- +- The move has 4 distinct parts: +- * Small moves of 32 bytes and under. +- * Medium sized moves of 33-128 bytes (fully unrolled). +- * Large moves where the source address is higher than the destination +- (forward copies) +- * Large moves where the destination address is higher than the source +- (copy backward, or move). +- +- We use only two registers q6 and q22 for the moves and move 32 bytes at a +- time to correctly train the hardware prefetcher for better throughput. +- +- For small and medium cases memcpy is used. */ +- +-ENTRY (__memmove_falkor) +- +- PTR_ARG (0) +- PTR_ARG (1) +- SIZE_ARG (2) +- +- cmp count, 32 +- add srcend, src, count +- add dstend, dstin, count +- b.ls L(copy32) +- cmp count, 128 +- b.ls L(copy128) +- sub tmp1, dstin, src +- ccmp tmp1, count, 2, hi +- b.lo L(move_long) +- +- /* CASE: Copy Forwards +- +- Align src to 16 byte alignment so that we don't cross cache line +- boundaries on both loads and stores. There are at least 128 bytes +- to copy, so copy 16 bytes unaligned and then align. The loop +- copies 32 bytes per iteration and prefetches one iteration ahead. */ +- +- ldr S_q, [src] +- and tmp1, src, 15 +- bic src, src, 15 +- sub dst, dstin, tmp1 +- add count, count, tmp1 /* Count is now 16 too large. */ +- ldr Q_q, [src, 16]! +- str S_q, [dstin] +- ldr S_q, [src, 16]! +- sub count, count, 32 + 32 + 16 /* Test and readjust count. */ +- +- .p2align 4 +-1: +- subs count, count, 32 +- str Q_q, [dst, 16] +- ldr Q_q, [src, 16]! +- str S_q, [dst, 32]! +- ldr S_q, [src, 16]! +- b.hi 1b +- +- /* Copy 32 bytes from the end before writing the data prefetched in the +- last loop iteration. */ +-2: +- ldr B_q, [srcend, -32] +- ldr C_q, [srcend, -16] +- str Q_q, [dst, 16] +- str S_q, [dst, 32] +- str B_q, [dstend, -32] +- str C_q, [dstend, -16] +- ret +- +- /* CASE: Copy Backwards +- +- Align srcend to 16 byte alignment so that we don't cross cache line +- boundaries on both loads and stores. There are at least 128 bytes +- to copy, so copy 16 bytes unaligned and then align. The loop +- copies 32 bytes per iteration and prefetches one iteration ahead. */ +- +- .p2align 4 +- nop +- nop +-L(move_long): +- cbz tmp1, 3f /* Return early if src == dstin */ +- ldr S_q, [srcend, -16] +- and tmp1, srcend, 15 +- sub srcend, srcend, tmp1 +- ldr Q_q, [srcend, -16]! +- str S_q, [dstend, -16] +- sub count, count, tmp1 +- ldr S_q, [srcend, -16]! +- sub dstend, dstend, tmp1 +- sub count, count, 32 + 32 +- +-1: +- subs count, count, 32 +- str Q_q, [dstend, -16] +- ldr Q_q, [srcend, -16]! +- str S_q, [dstend, -32]! +- ldr S_q, [srcend, -16]! +- b.hi 1b +- +- /* Copy 32 bytes from the start before writing the data prefetched in the +- last loop iteration. */ +- +- ldr B_q, [src, 16] +- ldr C_q, [src] +- str Q_q, [dstend, -16] +- str S_q, [dstend, -32] +- str B_q, [dstin, 16] +- str C_q, [dstin] +-3: ret +- +-END (__memmove_falkor) +-#endif +diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c +index fd346e7b73..7602a5d57d 100644 +--- a/sysdeps/aarch64/multiarch/memmove.c ++++ b/sysdeps/aarch64/multiarch/memmove.c +@@ -31,7 +31,6 @@ extern __typeof (__redirect_memmove) __libc_memmove; + extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_thunderx2 attribute_hidden; +-extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_a64fx attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_sve attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_mops attribute_hidden; +@@ -57,9 +56,6 @@ select_memmove_ifunc (void) + if (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)) + return __memmove_thunderx2; + +- if (IS_FALKOR (midr) || IS_PHECDA (midr)) +- return __memmove_falkor; +- + return __memmove_generic; + } + +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +index 233d5b2407..a11a86efab 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +@@ -37,11 +37,9 @@ struct cpu_list + }; + + static struct cpu_list cpu_list[] = { +- {"falkor", 0x510FC000}, + {"thunderxt88", 0x430F0A10}, + {"thunderx2t99", 0x431F0AF0}, + {"thunderx2t99p1", 0x420F5160}, +- {"phecda", 0x680F0000}, + {"ares", 0x411FD0C0}, + {"emag", 0x503F0001}, + {"kunpeng920", 0x481FD010}, +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +index 40b709677d..2cf745cd19 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +@@ -47,11 +47,6 @@ + #define IS_THUNDERX2(midr) (MIDR_IMPLEMENTOR(midr) == 'C' \ + && MIDR_PARTNUM(midr) == 0xaf) + +-#define IS_FALKOR(midr) (MIDR_IMPLEMENTOR(midr) == 'Q' \ +- && MIDR_PARTNUM(midr) == 0xc00) +- +-#define IS_PHECDA(midr) (MIDR_IMPLEMENTOR(midr) == 'h' \ +- && MIDR_PARTNUM(midr) == 0x000) + #define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ + && MIDR_PARTNUM(midr) == 0xd0c) + #define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \ +-- +2.33.0 + diff --git a/0010-aarch64-correct-CFI-in-rawmemchr-bug-31113.patch b/0010-aarch64-correct-CFI-in-rawmemchr-bug-31113.patch new file mode 100644 index 0000000000000000000000000000000000000000..b26031183e7b85b29e4c9f35ea777507377cbaee --- /dev/null +++ b/0010-aarch64-correct-CFI-in-rawmemchr-bug-31113.patch @@ -0,0 +1,31 @@ +From 168ae58e6e705a53a71850ee63ba5514fd5d7b70 Mon Sep 17 00:00:00 2001 +From: Andreas Schwab +Date: Thu, 23 Nov 2023 18:23:46 +0100 +Subject: [PATCH 10/26] aarch64: correct CFI in rawmemchr (bug 31113) + +The .cfi_return_column directive changes the return column for the whole +FDE range. But the actual intent is to tell the unwinder that the value +in x30 (lr) now resides in x15 after the move, and that is expressed by +the .cfi_register directive. + +(cherry picked from commit 3f798427884fa57770e8e2291cf58d5918254bb5) +--- + sysdeps/aarch64/rawmemchr.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sysdeps/aarch64/rawmemchr.S b/sysdeps/aarch64/rawmemchr.S +index efc4b7007b..1fff094215 100644 +--- a/sysdeps/aarch64/rawmemchr.S ++++ b/sysdeps/aarch64/rawmemchr.S +@@ -31,7 +31,7 @@ ENTRY (__rawmemchr) + + L(do_strlen): + mov x15, x30 +- cfi_return_column (x15) ++ cfi_register (x30, x15) + mov x14, x0 + bl __strlen + add x0, x14, x0 +-- +2.33.0 + diff --git a/0011-aarch64-fix-check-for-SVE-support-in-assembler.patch b/0011-aarch64-fix-check-for-SVE-support-in-assembler.patch new file mode 100644 index 0000000000000000000000000000000000000000..ecc3b2722bccf297beb1e30e307f59bb112db248 --- /dev/null +++ b/0011-aarch64-fix-check-for-SVE-support-in-assembler.patch @@ -0,0 +1,61 @@ +From 1bf17ce978da71431dbd1fc3660cfae3dff0672f Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy +Date: Wed, 13 Mar 2024 14:34:14 +0000 +Subject: [PATCH 11/26] aarch64: fix check for SVE support in assembler + +Due to GCC bug 110901 -mcpu can override -march setting when compiling +asm code and thus a compiler targetting a specific cpu can fail the +configure check even when binutils gas supports SVE. + +The workaround is that explicit .arch directive overrides both -mcpu +and -march, and since that's what the actual SVE memcpy uses the +configure check should use that too even if the GCC issue is fixed +independently. + +Reviewed-by: Florian Weimer +(cherry picked from commit 73c26018ed0ecd9c807bb363cc2c2ab4aca66a82) +--- + sysdeps/aarch64/configure | 5 +++-- + sysdeps/aarch64/configure.ac | 5 +++-- + 2 files changed, 6 insertions(+), 4 deletions(-) + mode change 100644 => 100755 sysdeps/aarch64/configure + +diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure +old mode 100644 +new mode 100755 +index ca57edce47..9606137e8d +--- a/sysdeps/aarch64/configure ++++ b/sysdeps/aarch64/configure +@@ -325,9 +325,10 @@ then : + printf %s "(cached) " >&6 + else $as_nop + cat > conftest.s <<\EOF +- ptrue p0.b ++ .arch armv8.2-a+sve ++ ptrue p0.b + EOF +-if { ac_try='${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&5' ++if { ac_try='${CC-cc} -c conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? +diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac +index 27874eceb4..56d12d661d 100644 +--- a/sysdeps/aarch64/configure.ac ++++ b/sysdeps/aarch64/configure.ac +@@ -90,9 +90,10 @@ LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs]) + # Check if asm support armv8.2-a+sve + AC_CACHE_CHECK([for SVE support in assembler], [libc_cv_aarch64_sve_asm], [dnl + cat > conftest.s <<\EOF +- ptrue p0.b ++ .arch armv8.2-a+sve ++ ptrue p0.b + EOF +-if AC_TRY_COMMAND(${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&AS_MESSAGE_LOG_FD); then ++if AC_TRY_COMMAND(${CC-cc} -c conftest.s 1>&AS_MESSAGE_LOG_FD); then + libc_cv_aarch64_sve_asm=yes + else + libc_cv_aarch64_sve_asm=no +-- +2.33.0 + diff --git a/0012-AArch64-Check-kernel-version-for-SVE-ifuncs.patch b/0012-AArch64-Check-kernel-version-for-SVE-ifuncs.patch new file mode 100644 index 0000000000000000000000000000000000000000..687c821882289c264334a066e45fa85cd06eec52 --- /dev/null +++ b/0012-AArch64-Check-kernel-version-for-SVE-ifuncs.patch @@ -0,0 +1,153 @@ +From 92da7c2cfeeea36d651142f47e570dd5076bc166 Mon Sep 17 00:00:00 2001 +From: Wilco Dijkstra +Date: Thu, 21 Mar 2024 16:48:33 +0000 +Subject: [PATCH 12/26] AArch64: Check kernel version for SVE ifuncs + +Old Linux kernels disable SVE after every system call. Calling the +SVE-optimized memcpy afterwards will then cause a trap to reenable SVE. +As a result, applications with a high use of syscalls may run slower with +the SVE memcpy. This is true for kernels between 4.15.0 and before 6.2.0, +except for 5.14.0 which was patched. Avoid this by checking the kernel +version and selecting the SVE ifunc on modern kernels. + +Parse the kernel version reported by uname() into a 24-bit kernel.major.minor +value without calling any library functions. If uname() is not supported or +if the version format is not recognized, assume the kernel is modern. + +Tested-by: Florian Weimer +Reviewed-by: Szabolcs Nagy +(cherry picked from commit 2e94e2f5d2bf2de124c8ad7da85463355e54ccb2) +--- + sysdeps/aarch64/multiarch/init-arch.h | 2 + + sysdeps/aarch64/multiarch/memcpy.c | 2 +- + sysdeps/aarch64/multiarch/memmove.c | 2 +- + .../unix/sysv/linux/aarch64/cpu-features.c | 48 +++++++++++++++++++ + .../unix/sysv/linux/aarch64/cpu-features.h | 1 + + 5 files changed, 53 insertions(+), 2 deletions(-) + +diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h +index e23e6ff290..daef631e04 100644 +--- a/sysdeps/aarch64/multiarch/init-arch.h ++++ b/sysdeps/aarch64/multiarch/init-arch.h +@@ -36,5 +36,7 @@ + MTE_ENABLED (); \ + bool __attribute__((unused)) sve = \ + GLRO(dl_aarch64_cpu_features).sve; \ ++ bool __attribute__((unused)) prefer_sve_ifuncs = \ ++ GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs; \ + bool __attribute__((unused)) mops = \ + GLRO(dl_aarch64_cpu_features).mops; +diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c +index 6471fe82e3..e7c7795db6 100644 +--- a/sysdeps/aarch64/multiarch/memcpy.c ++++ b/sysdeps/aarch64/multiarch/memcpy.c +@@ -47,7 +47,7 @@ select_memcpy_ifunc (void) + { + if (IS_A64FX (midr)) + return __memcpy_a64fx; +- return __memcpy_sve; ++ return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic; + } + + if (IS_THUNDERX (midr)) +diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c +index 7602a5d57d..6b77166851 100644 +--- a/sysdeps/aarch64/multiarch/memmove.c ++++ b/sysdeps/aarch64/multiarch/memmove.c +@@ -47,7 +47,7 @@ select_memmove_ifunc (void) + { + if (IS_A64FX (midr)) + return __memmove_a64fx; +- return __memmove_sve; ++ return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic; + } + + if (IS_THUNDERX (midr)) +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +index a11a86efab..4a205a6b35 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + #define DCZID_DZP_MASK (1 << 4) + #define DCZID_BS_MASK (0xf) +@@ -57,6 +58,46 @@ get_midr_from_mcpu (const char *mcpu) + return UINT64_MAX; + } + ++#if __LINUX_KERNEL_VERSION < 0x060200 ++ ++/* Return true if we prefer using SVE in string ifuncs. Old kernels disable ++ SVE after every system call which results in unnecessary traps if memcpy ++ uses SVE. This is true for kernels between 4.15.0 and before 6.2.0, except ++ for 5.14.0 which was patched. For these versions return false to avoid using ++ SVE ifuncs. ++ Parse the kernel version into a 24-bit kernel.major.minor value without ++ calling any library functions. If uname() is not supported or if the version ++ format is not recognized, assume the kernel is modern and return true. */ ++ ++static inline bool ++prefer_sve_ifuncs (void) ++{ ++ struct utsname buf; ++ const char *p = &buf.release[0]; ++ int kernel = 0; ++ int val; ++ ++ if (__uname (&buf) < 0) ++ return true; ++ ++ for (int shift = 16; shift >= 0; shift -= 8) ++ { ++ for (val = 0; *p >= '0' && *p <= '9'; p++) ++ val = val * 10 + *p - '0'; ++ kernel |= (val & 255) << shift; ++ if (*p++ != '.') ++ break; ++ } ++ ++ if (kernel >= 0x060200 || kernel == 0x050e00) ++ return true; ++ if (kernel >= 0x040f00) ++ return false; ++ return true; ++} ++ ++#endif ++ + static inline void + init_cpu_features (struct cpu_features *cpu_features) + { +@@ -119,6 +160,13 @@ init_cpu_features (struct cpu_features *cpu_features) + /* Check if SVE is supported. */ + cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE; + ++ cpu_features->prefer_sve_ifuncs = cpu_features->sve; ++ ++#if __LINUX_KERNEL_VERSION < 0x060200 ++ if (cpu_features->sve) ++ cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs (); ++#endif ++ + /* Check if MOPS is supported. */ + cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS; + } +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +index 2cf745cd19..351a619dcb 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +@@ -71,6 +71,7 @@ struct cpu_features + /* Currently, the GLIBC memory tagging tunable only defines 8 bits. */ + uint8_t mte_state; + bool sve; ++ bool prefer_sve_ifuncs; + bool mops; + }; + +-- +2.33.0 + diff --git a/0013-powerpc-Fix-ld.so-address-determination-for-PCREL-mo.patch b/0013-powerpc-Fix-ld.so-address-determination-for-PCREL-mo.patch new file mode 100644 index 0000000000000000000000000000000000000000..526dc2c6ccff755cf2f7e511edb4a93e7ed0cb77 --- /dev/null +++ b/0013-powerpc-Fix-ld.so-address-determination-for-PCREL-mo.patch @@ -0,0 +1,56 @@ +From 20534f81760635f3a71fb11ba251568cdc11c6a0 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Sun, 14 Apr 2024 08:24:51 +0200 +Subject: [PATCH 13/26] powerpc: Fix ld.so address determination for PCREL mode + (bug 31640) + +This seems to have stopped working with some GCC 14 versions, +which clobber r2. With other compilers, the kernel-provided +r2 value is still available at this point. + +Reviewed-by: Peter Bergner +(cherry picked from commit 14e56bd4ce15ac2d1cc43f762eb2e6b83fec1afe) +--- + sysdeps/powerpc/powerpc64/dl-machine.h | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h +index 9b8943bc91..7fa8a355b5 100644 +--- a/sysdeps/powerpc/powerpc64/dl-machine.h ++++ b/sysdeps/powerpc/powerpc64/dl-machine.h +@@ -79,6 +79,7 @@ elf_host_tolerates_class (const Elf64_Ehdr *ehdr) + static inline Elf64_Addr + elf_machine_load_address (void) __attribute__ ((const)); + ++#ifndef __PCREL__ + static inline Elf64_Addr + elf_machine_load_address (void) + { +@@ -106,6 +107,24 @@ elf_machine_dynamic (void) + /* Then subtract off the load address offset. */ + return runtime_dynamic - elf_machine_load_address() ; + } ++#else /* __PCREL__ */ ++/* In PCREL mode, r2 may have been clobbered. Rely on relative ++ relocations instead. */ ++ ++static inline ElfW(Addr) ++elf_machine_load_address (void) ++{ ++ extern const ElfW(Ehdr) __ehdr_start attribute_hidden; ++ return (ElfW(Addr)) &__ehdr_start; ++} ++ ++static inline ElfW(Addr) ++elf_machine_dynamic (void) ++{ ++ extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; ++ return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); ++} ++#endif /* __PCREL__ */ + + /* The PLT uses Elf64_Rela relocs. */ + #define elf_machine_relplt elf_machine_rela +-- +2.33.0 + diff --git a/iconv-ISO-2022-CN-EXT-fix-out-of-bound-writes-when-w.patch b/0014-iconv-ISO-2022-CN-EXT-fix-out-of-bound-writes-when-w.patch similarity index 95% rename from iconv-ISO-2022-CN-EXT-fix-out-of-bound-writes-when-w.patch rename to 0014-iconv-ISO-2022-CN-EXT-fix-out-of-bound-writes-when-w.patch index 4e39ccab381a51c9b1041dda344ea3a6e2adf0c7..1cd02a8dafff33d93ec03a238cfd4ec2e06f3b32 100644 --- a/iconv-ISO-2022-CN-EXT-fix-out-of-bound-writes-when-w.patch +++ b/0014-iconv-ISO-2022-CN-EXT-fix-out-of-bound-writes-when-w.patch @@ -1,8 +1,8 @@ -From f9dc609e06b1136bb0408be9605ce7973a767ada Mon Sep 17 00:00:00 2001 +From e1135387deded5d73924f6ca20c72a35dc8e1bda Mon Sep 17 00:00:00 2001 From: Charles Fol Date: Thu, 28 Mar 2024 12:25:38 -0300 -Subject: [PATCH] iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing - escape sequence (CVE-2024-2961) +Subject: [PATCH 14/26] iconv: ISO-2022-CN-EXT: fix out-of-bound writes when + writing escape sequence (CVE-2024-2961) ISO-2022-CN-EXT uses escape sequences to indicate character set changes (as specified by RFC 1922). While the SOdesignation has the expected @@ -15,6 +15,8 @@ Checked on aarch64-linux-gnu. Co-authored-by: Adhemerval Zanella Reviewed-by: Carlos O'Donell Tested-by: Carlos O'Donell + +(cherry picked from commit f9dc609e06b1136bb0408be9605ce7973a767ada) --- iconvdata/Makefile | 5 +- iconvdata/iso-2022-cn-ext.c | 12 +++ @@ -23,7 +25,7 @@ Tested-by: Carlos O'Donell create mode 100644 iconvdata/tst-iconv-iso-2022-cn-ext.c diff --git a/iconvdata/Makefile b/iconvdata/Makefile -index ea019ce5c0..7196a8744b 100644 +index dd5cafab21..075098dce8 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -75,7 +75,8 @@ ifeq (yes,$(build-shared)) @@ -46,7 +48,7 @@ index ea019ce5c0..7196a8744b 100644 $(objpfx)iconv-test.out: run-iconv-test.sh \ $(addprefix $(objpfx), $(gconv-modules)) \ diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c -index b34c8a36f4..cce29b1969 100644 +index 36727f0865..9bb02238a3 100644 --- a/iconvdata/iso-2022-cn-ext.c +++ b/iconvdata/iso-2022-cn-ext.c @@ -574,6 +574,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); diff --git a/0015-sparc-Remove-64-bit-check-on-sparc32-wordsize-BZ-275.patch b/0015-sparc-Remove-64-bit-check-on-sparc32-wordsize-BZ-275.patch new file mode 100644 index 0000000000000000000000000000000000000000..b32b5b4fb8d3154f5ce110d95a7e394ebb242177 --- /dev/null +++ b/0015-sparc-Remove-64-bit-check-on-sparc32-wordsize-BZ-275.patch @@ -0,0 +1,38 @@ +From 61484011e76d2bfafbe401f7058717c2029dd155 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Wed, 17 Jan 2024 10:13:06 -0300 +Subject: [PATCH 15/26] sparc: Remove 64 bit check on sparc32 wordsize (BZ + 27574) + +The sparc32 is always 32 bits. + +Checked on sparcv9-linux-gnu. + +(cherry picked from commit dd57f5e7b652772499cb220d78157c1038d24f06) +--- + sysdeps/sparc/sparc32/bits/wordsize.h | 13 ++++--------- + 1 file changed, 4 insertions(+), 9 deletions(-) + +diff --git a/sysdeps/sparc/sparc32/bits/wordsize.h b/sysdeps/sparc/sparc32/bits/wordsize.h +index 2f66f10d72..4bbd2e63b4 100644 +--- a/sysdeps/sparc/sparc32/bits/wordsize.h ++++ b/sysdeps/sparc/sparc32/bits/wordsize.h +@@ -1,11 +1,6 @@ + /* Determine the wordsize from the preprocessor defines. */ + +-#if defined __arch64__ || defined __sparcv9 +-# define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 +-#else +-# define __WORDSIZE 32 +-# define __WORDSIZE_TIME64_COMPAT32 0 +-# define __WORDSIZE32_SIZE_ULONG 0 +-# define __WORDSIZE32_PTRDIFF_LONG 0 +-#endif ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 0 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +-- +2.33.0 + diff --git a/0016-login-Check-default-sizes-of-structs-utmp-utmpx-last.patch b/0016-login-Check-default-sizes-of-structs-utmp-utmpx-last.patch new file mode 100644 index 0000000000000000000000000000000000000000..c1595fa6154afe8e1909bec986a5d4d71f6696e4 --- /dev/null +++ b/0016-login-Check-default-sizes-of-structs-utmp-utmpx-last.patch @@ -0,0 +1,247 @@ +From 78d9f91da6682f4073f05abaf309e4ca2b746003 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Fri, 19 Apr 2024 14:38:17 +0200 +Subject: [PATCH 16/26] login: Check default sizes of structs utmp, utmpx, + lastlog + +The default is for ports with a 64-bit time_t. +Ports with a 32-bit time_t or with __WORDSIZE_TIME64_COMPAT32=1 +need to override it. + +Reviewed-by: Adhemerval Zanella +(cherry picked from commit 4d4da5aab936504b2d3eca3146e109630d9093c4) +--- + login/Makefile | 2 +- + login/tst-utmp-size.c | 33 +++++++++++++++++++++++++++++++++ + sysdeps/arc/utmp-size.h | 3 +++ + sysdeps/arm/utmp-size.h | 2 ++ + sysdeps/csky/utmp-size.h | 2 ++ + sysdeps/generic/utmp-size.h | 23 +++++++++++++++++++++++ + sysdeps/hppa/utmp-size.h | 2 ++ + sysdeps/m68k/utmp-size.h | 3 +++ + sysdeps/microblaze/utmp-size.h | 2 ++ + sysdeps/mips/utmp-size.h | 2 ++ + sysdeps/nios2/utmp-size.h | 2 ++ + sysdeps/or1k/utmp-size.h | 3 +++ + sysdeps/powerpc/utmp-size.h | 2 ++ + sysdeps/riscv/utmp-size.h | 2 ++ + sysdeps/sh/utmp-size.h | 2 ++ + sysdeps/sparc/utmp-size.h | 2 ++ + sysdeps/x86/utmp-size.h | 2 ++ + 17 files changed, 88 insertions(+), 1 deletion(-) + create mode 100644 login/tst-utmp-size.c + create mode 100644 sysdeps/arc/utmp-size.h + create mode 100644 sysdeps/arm/utmp-size.h + create mode 100644 sysdeps/csky/utmp-size.h + create mode 100644 sysdeps/generic/utmp-size.h + create mode 100644 sysdeps/hppa/utmp-size.h + create mode 100644 sysdeps/m68k/utmp-size.h + create mode 100644 sysdeps/microblaze/utmp-size.h + create mode 100644 sysdeps/mips/utmp-size.h + create mode 100644 sysdeps/nios2/utmp-size.h + create mode 100644 sysdeps/or1k/utmp-size.h + create mode 100644 sysdeps/powerpc/utmp-size.h + create mode 100644 sysdeps/riscv/utmp-size.h + create mode 100644 sysdeps/sh/utmp-size.h + create mode 100644 sysdeps/sparc/utmp-size.h + create mode 100644 sysdeps/x86/utmp-size.h + +diff --git a/login/Makefile b/login/Makefile +index 74216cbcb2..1cca663769 100644 +--- a/login/Makefile ++++ b/login/Makefile +@@ -44,7 +44,7 @@ subdir-dirs = programs + vpath %.c programs + + tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin tst-updwtmpx \ +- tst-pututxline-lockfail tst-pututxline-cache ++ tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size + + # Empty compatibility library for old binaries. + extra-libs := libutil +diff --git a/login/tst-utmp-size.c b/login/tst-utmp-size.c +new file mode 100644 +index 0000000000..1b7f7ff042 +--- /dev/null ++++ b/login/tst-utmp-size.c +@@ -0,0 +1,33 @@ ++/* Check expected sizes of struct utmp, struct utmpx, struct lastlog. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ _Static_assert (sizeof (struct utmp) == UTMP_SIZE, "struct utmp size"); ++ _Static_assert (sizeof (struct utmpx) == UTMP_SIZE, "struct utmpx size"); ++ _Static_assert (sizeof (struct lastlog) == LASTLOG_SIZE, ++ "struct lastlog size"); ++ return 0; ++} ++ ++#include +diff --git a/sysdeps/arc/utmp-size.h b/sysdeps/arc/utmp-size.h +new file mode 100644 +index 0000000000..a247fcd3da +--- /dev/null ++++ b/sysdeps/arc/utmp-size.h +@@ -0,0 +1,3 @@ ++/* arc has less padding than other architectures with 64-bit time_t. */ ++#define UTMP_SIZE 392 ++#define LASTLOG_SIZE 296 +diff --git a/sysdeps/arm/utmp-size.h b/sysdeps/arm/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/arm/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/csky/utmp-size.h b/sysdeps/csky/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/csky/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/generic/utmp-size.h b/sysdeps/generic/utmp-size.h +new file mode 100644 +index 0000000000..89dbe878b0 +--- /dev/null ++++ b/sysdeps/generic/utmp-size.h +@@ -0,0 +1,23 @@ ++/* Expected sizes of utmp-related structures stored in files. 64-bit version. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Expected size, in bytes, of struct utmp and struct utmpx. */ ++#define UTMP_SIZE 400 ++ ++/* Expected size, in bytes, of struct lastlog. */ ++#define LASTLOG_SIZE 296 +diff --git a/sysdeps/hppa/utmp-size.h b/sysdeps/hppa/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/hppa/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/m68k/utmp-size.h b/sysdeps/m68k/utmp-size.h +new file mode 100644 +index 0000000000..5946685819 +--- /dev/null ++++ b/sysdeps/m68k/utmp-size.h +@@ -0,0 +1,3 @@ ++/* m68k has 2-byte alignment. */ ++#define UTMP_SIZE 382 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/microblaze/utmp-size.h b/sysdeps/microblaze/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/microblaze/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/mips/utmp-size.h b/sysdeps/mips/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/mips/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/nios2/utmp-size.h b/sysdeps/nios2/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/nios2/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/or1k/utmp-size.h b/sysdeps/or1k/utmp-size.h +new file mode 100644 +index 0000000000..6b3653aa4d +--- /dev/null ++++ b/sysdeps/or1k/utmp-size.h +@@ -0,0 +1,3 @@ ++/* or1k has less padding than other architectures with 64-bit time_t. */ ++#define UTMP_SIZE 392 ++#define LASTLOG_SIZE 296 +diff --git a/sysdeps/powerpc/utmp-size.h b/sysdeps/powerpc/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/powerpc/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/riscv/utmp-size.h b/sysdeps/riscv/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/riscv/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/sh/utmp-size.h b/sysdeps/sh/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/sh/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/sparc/utmp-size.h b/sysdeps/sparc/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/sparc/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/x86/utmp-size.h b/sysdeps/x86/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/x86/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +-- +2.33.0 + diff --git a/0017-login-structs-utmp-utmpx-lastlog-_TIME_BITS-independ.patch b/0017-login-structs-utmp-utmpx-lastlog-_TIME_BITS-independ.patch new file mode 100644 index 0000000000000000000000000000000000000000..9de4ff78f3792a455f4350fc89bd6b249b8f8f59 --- /dev/null +++ b/0017-login-structs-utmp-utmpx-lastlog-_TIME_BITS-independ.patch @@ -0,0 +1,399 @@ +From 68bff8859231787f7e19b01788cc59b673c14046 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Fri, 19 Apr 2024 14:38:17 +0200 +Subject: [PATCH 17/26] login: structs utmp, utmpx, lastlog _TIME_BITS + independence (bug 30701) + +These structs describe file formats under /var/log, and should not +depend on the definition of _TIME_BITS. This is achieved by +defining __WORDSIZE_TIME64_COMPAT32 to 1 on 32-bit ports that +support 32-bit time_t values (where __time_t is 32 bits). + +Reviewed-by: Adhemerval Zanella +(cherry picked from commit 9abdae94c7454c45e02e97e4ed1eb1b1915d13d8) +--- + bits/wordsize.h | 6 ++++-- + login/Makefile | 4 +++- + login/tst-utmp-size-64.c | 2 ++ + sysdeps/arm/bits/wordsize.h | 21 +++++++++++++++++++ + sysdeps/csky/bits/wordsize.h | 21 +++++++++++++++++++ + sysdeps/m68k/bits/wordsize.h | 21 +++++++++++++++++++ + sysdeps/microblaze/bits/wordsize.h | 21 +++++++++++++++++++ + sysdeps/mips/bits/wordsize.h | 6 +----- + sysdeps/nios2/bits/wordsize.h | 21 +++++++++++++++++++ + sysdeps/powerpc/powerpc32/bits/wordsize.h | 3 +-- + sysdeps/powerpc/powerpc64/bits/wordsize.h | 3 +-- + sysdeps/sh/bits/wordsize.h | 21 +++++++++++++++++++ + sysdeps/sparc/sparc32/bits/wordsize.h | 2 +- + sysdeps/sparc/sparc64/bits/wordsize.h | 3 +-- + sysdeps/unix/sysv/linux/hppa/bits/wordsize.h | 21 +++++++++++++++++++ + .../unix/sysv/linux/powerpc/bits/wordsize.h | 3 +-- + sysdeps/unix/sysv/linux/sparc/bits/wordsize.h | 3 +-- + sysdeps/x86/bits/wordsize.h | 5 ++--- + 18 files changed, 165 insertions(+), 22 deletions(-) + create mode 100644 login/tst-utmp-size-64.c + create mode 100644 sysdeps/arm/bits/wordsize.h + create mode 100644 sysdeps/csky/bits/wordsize.h + create mode 100644 sysdeps/m68k/bits/wordsize.h + create mode 100644 sysdeps/microblaze/bits/wordsize.h + create mode 100644 sysdeps/nios2/bits/wordsize.h + create mode 100644 sysdeps/sh/bits/wordsize.h + create mode 100644 sysdeps/unix/sysv/linux/hppa/bits/wordsize.h + +diff --git a/bits/wordsize.h b/bits/wordsize.h +index 14edae3a11..53013a9275 100644 +--- a/bits/wordsize.h ++++ b/bits/wordsize.h +@@ -21,7 +21,9 @@ + #define __WORDSIZE32_PTRDIFF_LONG + + /* Set to 1 in order to force time types to be 32 bits instead of 64 bits in +- struct lastlog and struct utmp{,x} on 64-bit ports. This may be done in ++ struct lastlog and struct utmp{,x}. This may be done in + order to make 64-bit ports compatible with 32-bit ports. Set to 0 for +- 64-bit ports where the time types are 64-bits or for any 32-bit ports. */ ++ 64-bit ports where the time types are 64-bits and new 32-bit ports ++ where time_t is 64 bits, and there is no companion architecture with ++ 32-bit time_t. */ + #define __WORDSIZE_TIME64_COMPAT32 +diff --git a/login/Makefile b/login/Makefile +index 1cca663769..7dd6cab9c9 100644 +--- a/login/Makefile ++++ b/login/Makefile +@@ -44,7 +44,9 @@ subdir-dirs = programs + vpath %.c programs + + tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin tst-updwtmpx \ +- tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size ++ tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size tst-utmp-size-64 ++ ++CFLAGS-tst-utmp-size-64.c += -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64 + + # Empty compatibility library for old binaries. + extra-libs := libutil +diff --git a/login/tst-utmp-size-64.c b/login/tst-utmp-size-64.c +new file mode 100644 +index 0000000000..7a581a4c12 +--- /dev/null ++++ b/login/tst-utmp-size-64.c +@@ -0,0 +1,2 @@ ++/* The on-disk layout must not change in time64 mode. */ ++#include "tst-utmp-size.c" +diff --git a/sysdeps/arm/bits/wordsize.h b/sysdeps/arm/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/arm/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/csky/bits/wordsize.h b/sysdeps/csky/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/csky/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/m68k/bits/wordsize.h b/sysdeps/m68k/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/m68k/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/microblaze/bits/wordsize.h b/sysdeps/microblaze/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/microblaze/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/mips/bits/wordsize.h b/sysdeps/mips/bits/wordsize.h +index 9d7d961f3e..cb72a0869a 100644 +--- a/sysdeps/mips/bits/wordsize.h ++++ b/sysdeps/mips/bits/wordsize.h +@@ -19,11 +19,7 @@ + + #define __WORDSIZE _MIPS_SZPTR + +-#if _MIPS_SIM == _ABI64 +-# define __WORDSIZE_TIME64_COMPAT32 1 +-#else +-# define __WORDSIZE_TIME64_COMPAT32 0 +-#endif ++#define __WORDSIZE_TIME64_COMPAT32 1 + + #if __WORDSIZE == 32 + #define __WORDSIZE32_SIZE_ULONG 0 +diff --git a/sysdeps/nios2/bits/wordsize.h b/sysdeps/nios2/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/nios2/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/powerpc/powerpc32/bits/wordsize.h b/sysdeps/powerpc/powerpc32/bits/wordsize.h +index 04ca9debf0..6993fb6b29 100644 +--- a/sysdeps/powerpc/powerpc32/bits/wordsize.h ++++ b/sysdeps/powerpc/powerpc32/bits/wordsize.h +@@ -2,10 +2,9 @@ + + #if defined __powerpc64__ + # define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 + #else + # define __WORDSIZE 32 +-# define __WORDSIZE_TIME64_COMPAT32 0 + # define __WORDSIZE32_SIZE_ULONG 0 + # define __WORDSIZE32_PTRDIFF_LONG 0 + #endif ++#define __WORDSIZE_TIME64_COMPAT32 1 +diff --git a/sysdeps/powerpc/powerpc64/bits/wordsize.h b/sysdeps/powerpc/powerpc64/bits/wordsize.h +index 04ca9debf0..6993fb6b29 100644 +--- a/sysdeps/powerpc/powerpc64/bits/wordsize.h ++++ b/sysdeps/powerpc/powerpc64/bits/wordsize.h +@@ -2,10 +2,9 @@ + + #if defined __powerpc64__ + # define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 + #else + # define __WORDSIZE 32 +-# define __WORDSIZE_TIME64_COMPAT32 0 + # define __WORDSIZE32_SIZE_ULONG 0 + # define __WORDSIZE32_PTRDIFF_LONG 0 + #endif ++#define __WORDSIZE_TIME64_COMPAT32 1 +diff --git a/sysdeps/sh/bits/wordsize.h b/sysdeps/sh/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/sh/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/sparc/sparc32/bits/wordsize.h b/sysdeps/sparc/sparc32/bits/wordsize.h +index 4bbd2e63b4..a2e79e0fa9 100644 +--- a/sysdeps/sparc/sparc32/bits/wordsize.h ++++ b/sysdeps/sparc/sparc32/bits/wordsize.h +@@ -1,6 +1,6 @@ + /* Determine the wordsize from the preprocessor defines. */ + + #define __WORDSIZE 32 +-#define __WORDSIZE_TIME64_COMPAT32 0 ++#define __WORDSIZE_TIME64_COMPAT32 1 + #define __WORDSIZE32_SIZE_ULONG 0 + #define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/sparc/sparc64/bits/wordsize.h b/sysdeps/sparc/sparc64/bits/wordsize.h +index 2f66f10d72..ea103e5970 100644 +--- a/sysdeps/sparc/sparc64/bits/wordsize.h ++++ b/sysdeps/sparc/sparc64/bits/wordsize.h +@@ -2,10 +2,9 @@ + + #if defined __arch64__ || defined __sparcv9 + # define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 + #else + # define __WORDSIZE 32 +-# define __WORDSIZE_TIME64_COMPAT32 0 + # define __WORDSIZE32_SIZE_ULONG 0 + # define __WORDSIZE32_PTRDIFF_LONG 0 + #endif ++#define __WORDSIZE_TIME64_COMPAT32 1 +diff --git a/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h +index 04ca9debf0..6993fb6b29 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h ++++ b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h +@@ -2,10 +2,9 @@ + + #if defined __powerpc64__ + # define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 + #else + # define __WORDSIZE 32 +-# define __WORDSIZE_TIME64_COMPAT32 0 + # define __WORDSIZE32_SIZE_ULONG 0 + # define __WORDSIZE32_PTRDIFF_LONG 0 + #endif ++#define __WORDSIZE_TIME64_COMPAT32 1 +diff --git a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h +index 7562875ee2..ea103e5970 100644 +--- a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h ++++ b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h +@@ -2,10 +2,9 @@ + + #if defined __arch64__ || defined __sparcv9 + # define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 + #else + # define __WORDSIZE 32 + # define __WORDSIZE32_SIZE_ULONG 0 + # define __WORDSIZE32_PTRDIFF_LONG 0 +-# define __WORDSIZE_TIME64_COMPAT32 0 + #endif ++#define __WORDSIZE_TIME64_COMPAT32 1 +diff --git a/sysdeps/x86/bits/wordsize.h b/sysdeps/x86/bits/wordsize.h +index 70f652bca1..3f40aa76f9 100644 +--- a/sysdeps/x86/bits/wordsize.h ++++ b/sysdeps/x86/bits/wordsize.h +@@ -8,10 +8,9 @@ + #define __WORDSIZE32_PTRDIFF_LONG 0 + #endif + ++#define __WORDSIZE_TIME64_COMPAT32 1 ++ + #ifdef __x86_64__ +-# define __WORDSIZE_TIME64_COMPAT32 1 + /* Both x86-64 and x32 use the 64-bit system call interface. */ + # define __SYSCALL_WORDSIZE 64 +-#else +-# define __WORDSIZE_TIME64_COMPAT32 0 + #endif +-- +2.33.0 + diff --git a/0018-nptl-Fix-tst-cancel30-on-kernels-without-ppoll_time6.patch b/0018-nptl-Fix-tst-cancel30-on-kernels-without-ppoll_time6.patch new file mode 100644 index 0000000000000000000000000000000000000000..0aeaa18015163b7e34bd52c8ccab04f17b4eda5b --- /dev/null +++ b/0018-nptl-Fix-tst-cancel30-on-kernels-without-ppoll_time6.patch @@ -0,0 +1,55 @@ +From decc9f504ae78bbee6faa49b9bca71c7eae62ea9 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Tue, 23 Apr 2024 21:16:32 +0200 +Subject: [PATCH 18/26] nptl: Fix tst-cancel30 on kernels without ppoll_time64 + support + +Fall back to ppoll if ppoll_time64 fails with ENOSYS. +Fixes commit 370da8a121c3ba9eeb2f13da15fc0f21f4136b25 ("nptl: Fix +tst-cancel30 on sparc64"). + +Reviewed-by: Adhemerval Zanella +(cherry picked from commit f4724843ada64a51d66f65d3199fe431f9d4c254) +--- + sysdeps/pthread/tst-cancel30.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +diff --git a/sysdeps/pthread/tst-cancel30.c b/sysdeps/pthread/tst-cancel30.c +index ff803386be..ace925ca67 100644 +--- a/sysdeps/pthread/tst-cancel30.c ++++ b/sysdeps/pthread/tst-cancel30.c +@@ -18,6 +18,7 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include + #include + #include +@@ -46,13 +47,19 @@ tf (void *arg) + + /* Wait indefinitely for cancellation, which only works if asynchronous + cancellation is enabled. */ +-#if defined SYS_ppoll || defined SYS_ppoll_time64 +-# ifndef SYS_ppoll_time64 +-# define SYS_ppoll_time64 SYS_ppoll ++#ifdef SYS_ppoll_time64 ++ long int ret = syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL); ++ (void) ret; ++# ifdef SYS_ppoll ++ if (ret == -1 && errno == ENOSYS) ++ syscall (SYS_ppoll, NULL, 0, NULL, NULL); + # endif +- syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL); + #else ++# ifdef SYS_ppoll ++ syscall (SYS_ppoll, NULL, 0, NULL, NULL); ++# else + for (;;); ++# endif + #endif + + return 0; +-- +2.33.0 + diff --git a/0019-i386-ulp-update-for-SSE2-disable-multi-arch-configur.patch b/0019-i386-ulp-update-for-SSE2-disable-multi-arch-configur.patch new file mode 100644 index 0000000000000000000000000000000000000000..8acf75801924ccf47d298b5f5958e0d48d09a7ba --- /dev/null +++ b/0019-i386-ulp-update-for-SSE2-disable-multi-arch-configur.patch @@ -0,0 +1,26 @@ +From 29e20bd1222cb69dcc6827e899ce7181090052dc Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Thu, 25 Apr 2024 12:56:48 +0200 +Subject: [PATCH 19/26] i386: ulp update for SSE2 --disable-multi-arch + configurations + +(cherry picked from commit 3a3a4497421422aa854c855cbe5110ca7d598ffc) +--- + sysdeps/i386/fpu/libm-test-ulps | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps +index 84e6686eba..f2139fc172 100644 +--- a/sysdeps/i386/fpu/libm-test-ulps ++++ b/sysdeps/i386/fpu/libm-test-ulps +@@ -1232,6 +1232,7 @@ ldouble: 6 + + Function: "hypot": + double: 1 ++float: 1 + float128: 1 + ldouble: 1 + +-- +2.33.0 + diff --git a/backport-CVE-2024-33599-nscd-Stack-based-buffer-overflow-in-netgroup-cache.patch b/0020-CVE-2024-33599-nscd-Stack-based-buffer-overflow-in-n.patch similarity index 76% rename from backport-CVE-2024-33599-nscd-Stack-based-buffer-overflow-in-netgroup-cache.patch rename to 0020-CVE-2024-33599-nscd-Stack-based-buffer-overflow-in-n.patch index 4c98db7c2b5a272bce42b24e513ae609cc5184d9..2188c60a7dd3d22ffb929fe5388dec4b503bf6b9 100644 --- a/backport-CVE-2024-33599-nscd-Stack-based-buffer-overflow-in-netgroup-cache.patch +++ b/0020-CVE-2024-33599-nscd-Stack-based-buffer-overflow-in-n.patch @@ -1,23 +1,20 @@ -From 87801a8fd06db1d654eea3e4f7626ff476a9bdaa Mon Sep 17 00:00:00 2001 +From 5968aebb86164034b8f8421b4abab2f837a5bdaf Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Thu, 25 Apr 2024 15:00:45 +0200 -Subject: [PATCH] CVE-2024-33599: nscd: Stack-based buffer overflow in netgroup - cache (bug 31677) +Subject: [PATCH 20/26] CVE-2024-33599: nscd: Stack-based buffer overflow in + netgroup cache (bug 31677) Using alloca matches what other caches do. The request length is bounded by MAXKEYLEN. Reviewed-by: Carlos O'Donell - -Conflict:NA -Reference:https://sourceware.org/git/?p=glibc.git;a=patch;h=87801a8fd06db1d654eea3e4f7626ff476a9bdaa - +(cherry picked from commit 87801a8fd06db1d654eea3e4f7626ff476a9bdaa) --- nscd/netgroupcache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c -index 06b7d7b6..31b721bb 100644 +index 06b7d7b6ca..31b721bbee 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -502,12 +502,13 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, @@ -37,5 +34,5 @@ index 06b7d7b6..31b721bb 100644 datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len, -- -2.43.0 +2.33.0 diff --git a/backport-CVE-2024-33600-nscd-Do-not-send-missing-not-found-response.patch b/0021-CVE-2024-33600-nscd-Do-not-send-missing-not-found-re.patch similarity index 84% rename from backport-CVE-2024-33600-nscd-Do-not-send-missing-not-found-response.patch rename to 0021-CVE-2024-33600-nscd-Do-not-send-missing-not-found-re.patch index 4e0beda6aca8d4b202df6a942f3d57b34d0e57d1..29b9214f48ea7985809cf208c3e7d3a9b072f16f 100644 --- a/backport-CVE-2024-33600-nscd-Do-not-send-missing-not-found-response.patch +++ b/0021-CVE-2024-33600-nscd-Do-not-send-missing-not-found-re.patch @@ -1,23 +1,20 @@ -From 7835b00dbce53c3c87bbbb1754a95fb5e58187aa Mon Sep 17 00:00:00 2001 +From 541ea5172aa658c4bd5c6c6d6fd13903c3d5bb0a Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Thu, 25 Apr 2024 15:01:07 +0200 -Subject: [PATCH] CVE-2024-33600: nscd: Do not send missing not-found response - in addgetnetgrentX (bug 31678) +Subject: [PATCH 21/26] CVE-2024-33600: nscd: Do not send missing not-found + response in addgetnetgrentX (bug 31678) If we failed to add a not-found response to the cache, the dataset point can be null, resulting in a null pointer dereference. Reviewed-by: Siddhesh Poyarekar - -Conflict:NA -Reference:https://sourceware.org/git/?p=glibc.git;a=patch;h=7835b00dbce53c3c87bbbb1754a95fb5e58187aa - +(cherry picked from commit 7835b00dbce53c3c87bbbb1754a95fb5e58187aa) --- nscd/netgroupcache.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c -index 31b721bb..32c6aef3 100644 +index 31b721bbee..32c6aef370 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -147,7 +147,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, @@ -58,5 +55,5 @@ index 31b721bb..32c6aef3 100644 { /* If necessary, we also propagate the data to disk. */ -- -2.43.0 +2.33.0 diff --git a/backport-CVE-2024-33600-nscd-Avoid-null-pointer-crash-after-not-found-response.patch b/0022-CVE-2024-33600-nscd-Avoid-null-pointer-crashes-after.patch similarity index 88% rename from backport-CVE-2024-33600-nscd-Avoid-null-pointer-crash-after-not-found-response.patch rename to 0022-CVE-2024-33600-nscd-Avoid-null-pointer-crashes-after.patch index 1642cbffbe661433dc62e7212357c927cdeaee2b..b861cda772b16aeaff0c12c3c278aed57322386d 100644 --- a/backport-CVE-2024-33600-nscd-Avoid-null-pointer-crash-after-not-found-response.patch +++ b/0022-CVE-2024-33600-nscd-Avoid-null-pointer-crashes-after.patch @@ -1,7 +1,7 @@ -From b048a482f088e53144d26a61c390bed0210f49f2 Mon Sep 17 00:00:00 2001 +From 2ae9446c1b7a3064743b4a51c0bbae668ee43e4c Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Thu, 25 Apr 2024 15:01:07 +0200 -Subject: [PATCH] CVE-2024-33600: nscd: Avoid null pointer crashes after +Subject: [PATCH 22/26] CVE-2024-33600: nscd: Avoid null pointer crashes after notfound response (bug 31678) The addgetnetgrentX call in addinnetgrX may have failed to produce @@ -16,16 +16,13 @@ add the negative response to the mapping, so that the client can get it from there in the future, instead of going through the socket. Reviewed-by: Siddhesh Poyarekar - -Conflict:NA -Reference:https://sourceware.org/git/?p=glibc.git;a=patch;h=b048a482f088e53144d26a61c390bed0210f49f2 - +(cherry picked from commit b048a482f088e53144d26a61c390bed0210f49f2) --- nscd/netgroupcache.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c -index 32c6aef3..c3cd79de 100644 +index 32c6aef370..c3cd79dec5 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -511,14 +511,15 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, @@ -59,5 +56,5 @@ index 32c6aef3..c3cd79de 100644 /* We write the dataset before inserting it to the database since while inserting this thread might block and so would -- -2.43.0 +2.33.0 diff --git a/backport-CVE-2024-33601-CVE-2024-33602-nscd-Use-two-buffer-in-addgetnetgrentX.patch b/0023-CVE-2024-33601-CVE-2024-33602-nscd-netgroup-Use-two-.patch similarity index 97% rename from backport-CVE-2024-33601-CVE-2024-33602-nscd-Use-two-buffer-in-addgetnetgrentX.patch rename to 0023-CVE-2024-33601-CVE-2024-33602-nscd-netgroup-Use-two-.patch index 44c190e28c1d68ed43dc830578e3b8c56e13ec5c..8f027b386b79e265ad015241f58cf3411f0b535c 100644 --- a/backport-CVE-2024-33601-CVE-2024-33602-nscd-Use-two-buffer-in-addgetnetgrentX.patch +++ b/0023-CVE-2024-33601-CVE-2024-33602-nscd-netgroup-Use-two-.patch @@ -1,7 +1,7 @@ -From c04a21e050d64a1193a6daab872bca2528bda44b Mon Sep 17 00:00:00 2001 +From 71af8ca864345d39b746d5cee84b94b430fad5db Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Thu, 25 Apr 2024 15:01:07 +0200 -Subject: [PATCH] CVE-2024-33601, CVE-2024-33602: nscd: netgroup: Use two +Subject: [PATCH 23/26] CVE-2024-33601, CVE-2024-33602: nscd: netgroup: Use two buffers in addgetnetgrentX (bug 31680) This avoids potential memory corruption when the underlying NSS @@ -16,16 +16,13 @@ Scratch buffer allocation failure is handled by return -1 This fixes bug 31679. Reviewed-by: Siddhesh Poyarekar - -Conflict:NA -Reference:https://sourceware.org/git/?p=glibc.git;a=patch;h=c04a21e050d64a1193a6daab872bca2528bda44b - +(cherry picked from commit c04a21e050d64a1193a6daab872bca2528bda44b) --- nscd/netgroupcache.c | 219 ++++++++++++++++++++++++------------------- 1 file changed, 121 insertions(+), 98 deletions(-) diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c -index c3cd79de..cc4e270c 100644 +index c3cd79dec5..cc4e270c1f 100644 --- a/nscd/netgroupcache.c +++ b/nscd/netgroupcache.c @@ -23,6 +23,7 @@ @@ -389,5 +386,5 @@ index c3cd79de..cc4e270c 100644 + return timeout; } -- -2.43.0 +2.33.0 diff --git a/0024-elf-Also-compile-dl-misc.os-with-rtld-early-cflags.patch b/0024-elf-Also-compile-dl-misc.os-with-rtld-early-cflags.patch new file mode 100644 index 0000000000000000000000000000000000000000..8233425151fd990e80dc32278761f3a046bd5bb0 --- /dev/null +++ b/0024-elf-Also-compile-dl-misc.os-with-rtld-early-cflags.patch @@ -0,0 +1,54 @@ +From e9f05fa1c62c8044ff025963498063f73eb51c5f Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Thu, 25 Apr 2024 08:06:52 -0700 +Subject: [PATCH 24/26] elf: Also compile dl-misc.os with $(rtld-early-cflags) + +Also compile dl-misc.os with $(rtld-early-cflags) to avoid + +Program received signal SIGILL, Illegal instruction. +0x00007ffff7fd36ea in _dl_strtoul (nptr=nptr@entry=0x7fffffffe2c9 "2", + endptr=endptr@entry=0x7fffffffd728) at dl-misc.c:156 +156 bool positive = true; +(gdb) bt + #0 0x00007ffff7fd36ea in _dl_strtoul (nptr=nptr@entry=0x7fffffffe2c9 "2", + endptr=endptr@entry=0x7fffffffd728) at dl-misc.c:156 + #1 0x00007ffff7fdb1a9 in tunable_initialize ( + cur=cur@entry=0x7ffff7ffbc00 , + strval=strval@entry=0x7fffffffe2c9 "2", len=len@entry=1) + at dl-tunables.c:131 + #2 0x00007ffff7fdb3a2 in parse_tunables (valstring=) + at dl-tunables.c:258 + #3 0x00007ffff7fdb5d9 in __GI___tunables_init (envp=0x7fffffffdd58) + at dl-tunables.c:288 + #4 0x00007ffff7fe44c3 in _dl_sysdep_start ( + start_argptr=start_argptr@entry=0x7fffffffdcb0, + dl_main=dl_main@entry=0x7ffff7fe5f80 ) + at ../sysdeps/unix/sysv/linux/dl-sysdep.c:110 + #5 0x00007ffff7fe5cae in _dl_start_final (arg=0x7fffffffdcb0) at rtld.c:494 + #6 _dl_start (arg=0x7fffffffdcb0) at rtld.c:581 + #7 0x00007ffff7fe4b38 in _start () +(gdb) + +when setting GLIBC_TUNABLES in glibc compiled with APX. +Reviewed-by: Florian Weimer + +(cherry picked from commit 049b7684c912dd32b67b1b15b0f43bf07d5f512e) +--- + elf/Makefile | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/elf/Makefile b/elf/Makefile +index 1a05a6aaca..c2af11b92c 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -166,6 +166,7 @@ CFLAGS-.op += $(call elide-stack-protector,.op,$(elide-routines.os)) + CFLAGS-.os += $(call elide-stack-protector,.os,$(all-rtld-routines)) + + # Add the requested compiler flags to the early startup code. ++CFLAGS-dl-misc.os += $(rtld-early-cflags) + CFLAGS-dl-printf.os += $(rtld-early-cflags) + CFLAGS-dl-setup_hash.os += $(rtld-early-cflags) + CFLAGS-dl-sysdep.os += $(rtld-early-cflags) +-- +2.33.0 + diff --git a/0025-nscd-Use-time_t-for-return-type-of-addgetnetgrentX.patch b/0025-nscd-Use-time_t-for-return-type-of-addgetnetgrentX.patch new file mode 100644 index 0000000000000000000000000000000000000000..dd97d37081273dc7f1b18646337ed55b6abe52bf --- /dev/null +++ b/0025-nscd-Use-time_t-for-return-type-of-addgetnetgrentX.patch @@ -0,0 +1,36 @@ +From f510d75ff7f7405328853bd67b75f6847dfe9d31 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Thu, 2 May 2024 17:06:19 +0200 +Subject: [PATCH 25/26] nscd: Use time_t for return type of addgetnetgrentX + +Using int may give false results for future dates (timeouts after the +year 2028). + +Fixes commit 04a21e050d64a1193a6daab872bca2528bda44b ("CVE-2024-33601, +CVE-2024-33602: nscd: netgroup: Use two buffers in addgetnetgrentX +(bug 31680)"). + +Reviewed-by: Carlos O'Donell +(cherry picked from commit 4bbca1a44691a6e9adcee5c6798a707b626bc331) +--- + nscd/netgroupcache.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c +index cc4e270c1f..a63b260fdb 100644 +--- a/nscd/netgroupcache.c ++++ b/nscd/netgroupcache.c +@@ -680,8 +680,8 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he, + .key_len = he->len + }; + +- int timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner, +- he, dh); ++ time_t timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner, ++ he, dh); + if (timeout < 0) + timeout = 0; + return timeout; +-- +2.33.0 + diff --git a/0026-resolv-Fix-some-unaligned-accesses-in-resolver-BZ-30.patch b/0026-resolv-Fix-some-unaligned-accesses-in-resolver-BZ-30.patch new file mode 100644 index 0000000000000000000000000000000000000000..38d2a395fa88ac16e10cf1d7637fca4f22533377 --- /dev/null +++ b/0026-resolv-Fix-some-unaligned-accesses-in-resolver-BZ-30.patch @@ -0,0 +1,57 @@ +From 5aa4bb67b9cbd334789199c03c9d30b90662a313 Mon Sep 17 00:00:00 2001 +From: John David Anglin +Date: Wed, 13 Sep 2023 11:04:41 +0000 +Subject: [PATCH 26/26] resolv: Fix some unaligned accesses in resolver [BZ + #30750] + +Signed-off-by: John David Anglin +--- + resolv/res_nameinquery.c | 3 ++- + resolv/res_queriesmatch.c | 3 ++- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/resolv/res_nameinquery.c b/resolv/res_nameinquery.c +index 24172700e1..ca56bc9283 100644 +--- a/resolv/res_nameinquery.c ++++ b/resolv/res_nameinquery.c +@@ -84,6 +84,7 @@ + + #include + #include ++#include + + /* Author: paul vixie, 29may94. */ + int +@@ -91,7 +92,7 @@ __libc_res_nameinquery (const char *name, int type, int class, + const unsigned char *buf, const unsigned char *eom) + { + const unsigned char *cp = buf + HFIXEDSZ; +- int qdcount = ntohs (((HEADER *) buf)->qdcount); ++ int qdcount = ntohs (((UHEADER *) buf)->qdcount); + + while (qdcount-- > 0) + { +diff --git a/resolv/res_queriesmatch.c b/resolv/res_queriesmatch.c +index 13a6936c47..ba1c1d0c0c 100644 +--- a/resolv/res_queriesmatch.c ++++ b/resolv/res_queriesmatch.c +@@ -83,6 +83,7 @@ + */ + + #include ++#include + + /* Author: paul vixie, 29may94. */ + int +@@ -102,7 +103,7 @@ __libc_res_queriesmatch (const unsigned char *buf1, const unsigned char *eom1, + order. We can compare it with the second buffer's QDCOUNT + value without doing this. */ + int qdcount = ((HEADER *) buf1)->qdcount; +- if (qdcount != ((HEADER *) buf2)->qdcount) ++ if (qdcount != ((UHEADER *) buf2)->qdcount) + return 0; + + qdcount = htons (qdcount); +-- +2.33.0 + diff --git a/glibc.spec b/glibc.spec index 5ab59eb945b376489ebb6ba5e54d7f611abf2ca8..b470d979f84f76c6111ea0e5f0efa5b1ab926019 100644 --- a/glibc.spec +++ b/glibc.spec @@ -67,7 +67,7 @@ ############################################################################## Name: glibc Version: 2.38 -Release: 27 +Release: 28 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -85,6 +85,7 @@ Source7: replace_same_file_to_hard_link.py Source8: testsuite_whitelist %endif +#upstream glibc patch Patch0: glibc-1070416.patch Patch1: stdlib-Improve-tst-realpath-compatibility-with-sourc.patch Patch2: 0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch @@ -170,12 +171,34 @@ Patch81: LoongArch-Unify-Register-Names.patch Patch82: LoongArch-Update-hwcap.h-to-sync-with-LoongArch-kern.patch Patch83: linux-Sync-Linux-6.6-elf.h.patch Patch84: Decrease-value-of-arch_minimum_kernel-with-LoongArch.patch -Patch85: iconv-ISO-2022-CN-EXT-fix-out-of-bound-writes-when-w.patch -Patch86: backport-CVE-2024-33599-nscd-Stack-based-buffer-overflow-in-netgroup-cache.patch -Patch87: backport-CVE-2024-33600-nscd-Do-not-send-missing-not-found-response.patch -Patch88: backport-CVE-2024-33600-nscd-Avoid-null-pointer-crash-after-not-found-response.patch -Patch89: backport-CVE-2024-33601-CVE-2024-33602-nscd-Use-two-buffer-in-addgetnetgrentX.patch - +Patch85: 0001-S390-Do-not-clobber-r7-in-clone-BZ-31402.patch +Patch86: 0002-linux-Use-rseq-area-unconditionally-in-sched_getcpu-.patch +Patch87: 0003-LoongArch-Correct-__ieee754-_-_scalb-__ieee754-_-_sc.patch +Patch88: 0004-Add-HWCAP2_MOPS-from-Linux-6.5-to-AArch64-bits-hwcap.patch +Patch89: 0005-AArch64-Add-support-for-MOPS-memcpy-memmove-memset.patch +Patch90: 0006-AArch64-Cleanup-ifuncs.patch +Patch91: 0007-AArch64-Cleanup-emag-memset.patch +Patch92: 0008-AArch64-Add-memset_zva64.patch +Patch93: 0009-AArch64-Remove-Falkor-memcpy.patch +Patch94: 0010-aarch64-correct-CFI-in-rawmemchr-bug-31113.patch +Patch95: 0011-aarch64-fix-check-for-SVE-support-in-assembler.patch +Patch96: 0012-AArch64-Check-kernel-version-for-SVE-ifuncs.patch +Patch97: 0013-powerpc-Fix-ld.so-address-determination-for-PCREL-mo.patch +Patch98: 0014-iconv-ISO-2022-CN-EXT-fix-out-of-bound-writes-when-w.patch +Patch99: 0015-sparc-Remove-64-bit-check-on-sparc32-wordsize-BZ-275.patch +Patch100: 0016-login-Check-default-sizes-of-structs-utmp-utmpx-last.patch +Patch101: 0017-login-structs-utmp-utmpx-lastlog-_TIME_BITS-independ.patch +Patch102: 0018-nptl-Fix-tst-cancel30-on-kernels-without-ppoll_time6.patch +Patch103: 0019-i386-ulp-update-for-SSE2-disable-multi-arch-configur.patch +Patch104: 0020-CVE-2024-33599-nscd-Stack-based-buffer-overflow-in-n.patch +Patch105: 0021-CVE-2024-33600-nscd-Do-not-send-missing-not-found-re.patch +Patch106: 0022-CVE-2024-33600-nscd-Avoid-null-pointer-crashes-after.patch +Patch107: 0023-CVE-2024-33601-CVE-2024-33602-nscd-netgroup-Use-two-.patch +Patch108: 0024-elf-Also-compile-dl-misc.os-with-rtld-early-cflags.patch +Patch109: 0025-nscd-Use-time_t-for-return-type-of-addgetnetgrentX.patch +Patch110: 0026-resolv-Fix-some-unaligned-accesses-in-resolver-BZ-30.patch + +#openEuler patch list Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch Patch9001: locale-delete-no-hard-link-to-avoid-all_language-pac.patch #Patch9002: 0001-add-base-files-for-libphtread-condition-family.patch @@ -1397,6 +1420,35 @@ fi %endif %changelog +* Fri May 10 Qingqing Li - 2.38-28 +- backport glibc upstream 2.38 branch, here is the 26 patches: +- resolv: Fix some unaligned accesses in resolver [BZ #30750] +- nscd: Use time_t for return type of addgetnetgrentX +- elf: Also compile dl-misc.os with $(rtld-early-cflags) +- CVE-2024-33601, CVE-2024-33602: nscd: netgroup: Use two buffers in addgetnetgrentX (bug 31680) +- CVE-2024-33600: nscd: Avoid null pointer crashes after notfound response (bug 31678) +- CVE-2024-33600: nscd: Do not send missing not-found response in addgetnetgrentX (bug 31678) +- CVE-2024-33599: nscd: Stack-based buffer overflow in netgroup cache (bug 31677) +- i386: ulp update for SSE2 --disable-multi-arch configurations +- nptl: Fix tst-cancel30 on kernels without ppoll_time64 support +- login: structs utmp, utmpx, lastlog _TIME_BITS independence (bug 30701) +- login: Check default sizes of structs utmp, utmpx, lastlog +- sparc: Remove 64 bit check on sparc32 wordsize (BZ 27574) +- iconv: ISO-2022-CN-EXT: fix out-of-bound writes when writing escape sequence (CVE-2024-2961) +- powerpc: Fix ld.so address determination for PCREL mode (bug 31640) +- AArch64: Check kernel version for SVE ifuncs +- aarch64: fix check for SVE support in assembler +- aarch64: correct CFI in rawmemchr (bug 31113) +- AArch64: Remove Falkor memcpy +- AArch64: Add memset_zva64 +- AArch64: Cleanup emag memset +- AArch64: Cleanup ifuncs +- AArch64: Add support for MOPS memcpy/memmove/memset +- Add HWCAP2_MOPS from Linux 6.5 to AArch64 bits/hwcap.h +- LoongArch: Correct {__ieee754, _}_scalb -> {__ieee754, _}_scalbf +- linux: Use rseq area unconditionally in sched_getcpu (bug 31479) +- S390: Do not clobber r7 in clone [BZ #31402] + * Mon Apr 29 2024 chengyechun - 2.38-27 - Type:CVE - ID:CVE-2024-33599 CVE-2024-33600 CVE-2024-33601 CVE-2024-33602