From 4ee3f7b30c13b20e6523b0fc12a1f15a2383dca3 Mon Sep 17 00:00:00 2001 From: wangshuo Date: Tue, 2 Jun 2020 14:35:42 +0800 Subject: [PATCH 1/2] fix array overflow in backtrace on PowerPC --- ...ack-corruption-from-range-reduction-.patch | 143 +++ ...ow-in-backtrace-on-PowerPC-bug-25423.patch | 68 ++ backport-Kunpeng-patches.patch | 1088 +++++++++++++++++ glibc.spec | 16 +- 4 files changed, 1313 insertions(+), 2 deletions(-) create mode 100644 Avoid-ldbl-96-stack-corruption-from-range-reduction-.patch create mode 100644 backport-CVE-2020-1751-Fix-array-overflow-in-backtrace-on-PowerPC-bug-25423.patch create mode 100644 backport-Kunpeng-patches.patch diff --git a/Avoid-ldbl-96-stack-corruption-from-range-reduction-.patch b/Avoid-ldbl-96-stack-corruption-from-range-reduction-.patch new file mode 100644 index 0000000..b318796 --- /dev/null +++ b/Avoid-ldbl-96-stack-corruption-from-range-reduction-.patch @@ -0,0 +1,143 @@ +From 9333498794cde1d5cca518badf79533a24114b6f Mon Sep 17 00:00:00 2001 +From: Joseph Myers +Date: Wed, 12 Feb 2020 23:31:56 +0000 +Subject: [PATCH] Avoid ldbl-96 stack corruption from range reduction of + pseudo-zero (bug 25487). + +Bug 25487 reports stack corruption in ldbl-96 sinl on a pseudo-zero +argument (an representation where all the significand bits, including +the explicit high bit, are zero, but the exponent is not zero, which +is not a valid representation for the long double type). + +Although this is not a valid long double representation, existing +practice in this area (see bug 4586, originally marked invalid but +subsequently fixed) is that we still seek to avoid invalid memory +accesses as a result, in case of programs that treat arbitrary binary +data as long double representations, although the invalid +representations of the ldbl-96 format do not need to be consistently +handled the same as any particular valid representation. + +This patch makes the range reduction detect pseudo-zero and unnormal +representations that would otherwise go to __kernel_rem_pio2, and +returns a NaN for them instead of continuing with the range reduction +process. (Pseudo-zero and unnormal representations whose unbiased +exponent is less than -1 have already been safely returned from the +function before this point without going through the rest of range +reduction.) Pseudo-zero representations would previously result in +the value passed to __kernel_rem_pio2 being all-zero, which is +definitely unsafe; unnormal representations would previously result in +a value passed whose high bit is zero, which might well be unsafe +since that is not a form of input expected by __kernel_rem_pio2. + +Tested for x86_64. + +backport to openeuler. + +--- + NEWS | 4 +++ + sysdeps/ieee754/ldbl-96/Makefile | 3 +- + sysdeps/ieee754/ldbl-96/e_rem_pio2l.c | 12 +++++++ + sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c | 41 ++++++++++++++++++++++ + 4 files changed, 59 insertions(+), 1 deletion(-) + create mode 100644 sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c + +diff --git a/NEWS b/NEWS +index 2b681ed7..eb31aca6 100644 +--- a/NEWS ++++ b/NEWS +@@ -246,6 +246,10 @@ Security related changes: + addresses for loaded libraries and thus bypass ASLR for a setuid + program. Reported by Marcin Kościelnicki. + ++ CVE-2020-10029: Trigonometric functions on x86 targets suffered from stack ++ corruption when they were passed a pseudo-zero argument. Reported by Guido ++ Vranken / ForAllSecure Mayhem. ++ + The following bugs are resolved with this release: + + [1190] stdio: fgetc()/fread() behaviour is not POSIX compliant +diff --git a/sysdeps/ieee754/ldbl-96/Makefile b/sysdeps/ieee754/ldbl-96/Makefile +index 790f670e..99c596e3 100644 +--- a/sysdeps/ieee754/ldbl-96/Makefile ++++ b/sysdeps/ieee754/ldbl-96/Makefile +@@ -17,5 +17,6 @@ + # . + + ifeq ($(subdir),math) +-tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 ++tests += test-canonical-ldbl-96 test-totalorderl-ldbl-96 test-sinl-pseudo ++CFLAGS-test-sinl-pseudo.c += -fstack-protector-all + endif +diff --git a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c +index f67805f2..b0b899bc 100644 +--- a/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c ++++ b/sysdeps/ieee754/ldbl-96/e_rem_pio2l.c +@@ -210,6 +210,18 @@ __ieee754_rem_pio2l (long double x, long double *y) + return 0; + } + ++ if ((i0 & 0x80000000) == 0) ++ { ++ /* Pseudo-zero and unnormal representations are not valid ++ representations of long double. We need to avoid stack ++ corruption in __kernel_rem_pio2, which expects input in a ++ particular normal form, but those representations do not need ++ to be consistently handled like any particular floating-point ++ value. */ ++ y[1] = y[0] = __builtin_nanl (""); ++ return 0; ++ } ++ + /* Split the 64 bits of the mantissa into three 24-bit integers + stored in a double array. */ + exp = j0 - 23; +diff --git a/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c +new file mode 100644 +index 00000000..abbce861 +--- /dev/null ++++ b/sysdeps/ieee754/ldbl-96/test-sinl-pseudo.c +@@ -0,0 +1,41 @@ ++/* Test sinl for pseudo-zeros and unnormals for ldbl-96 (bug 25487). ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ for (int i = 0; i < 64; i++) ++ { ++ uint64_t sig = i == 63 ? 0 : 1ULL << i; ++ long double ld; ++ SET_LDOUBLE_WORDS (ld, 0x4141, ++ sig >> 32, sig & 0xffffffffULL); ++ /* The requirement is that no stack overflow occurs when the ++ pseudo-zero or unnormal goes through range reduction. */ ++ volatile long double ldr; ++ ldr = sinl (ld); ++ (void) ldr; ++ } ++ return 0; ++} ++ ++#include +-- +2.19.1 + diff --git a/backport-CVE-2020-1751-Fix-array-overflow-in-backtrace-on-PowerPC-bug-25423.patch b/backport-CVE-2020-1751-Fix-array-overflow-in-backtrace-on-PowerPC-bug-25423.patch new file mode 100644 index 0000000..4bb478d --- /dev/null +++ b/backport-CVE-2020-1751-Fix-array-overflow-in-backtrace-on-PowerPC-bug-25423.patch @@ -0,0 +1,68 @@ +From d93769405996dfc11d216ddbe415946617b5a494 Mon Sep 17 00:00:00 2001 +From: Andreas Schwab +Date: Mon, 20 Jan 2020 17:01:50 +0100 +Subject: [PATCH] Fix array overflow in backtrace on PowerPC (bug 25423) + +When unwinding through a signal frame the backtrace function on PowerPC +didn't check array bounds when storing the frame address. Fixes commit +d400dcac5e ("PowerPC: fix backtrace to handle signal trampolines"). + +Signed-off-by: wuxu.wu +--- + debug/tst-backtrace5.c | 12 ++++++++++++ + sysdeps/powerpc/powerpc32/backtrace.c | 2 ++ + sysdeps/powerpc/powerpc64/backtrace.c | 2 ++ + 3 files changed, 16 insertions(+) + +diff --git a/debug/tst-backtrace5.c b/debug/tst-backtrace5.c +index e7ce410..b2f4616 100644 +--- a/debug/tst-backtrace5.c ++++ b/debug/tst-backtrace5.c +@@ -89,6 +89,18 @@ handle_signal (int signum) + } + /* Symbol names are not available for static functions, so we do not + check do_test. */ ++ ++ /* Check that backtrace does not return more than what fits in the array ++ (bug 25423). */ ++ for (int j = 0; j < NUM_FUNCTIONS; j++) ++ { ++ n = backtrace (addresses, j); ++ if (n > j) ++ { ++ FAIL (); ++ return; ++ } ++ } + } + + NO_INLINE int +diff --git a/sysdeps/powerpc/powerpc32/backtrace.c b/sysdeps/powerpc/powerpc32/backtrace.c +index 7c2d472..d1456c8 100644 +--- a/sysdeps/powerpc/powerpc32/backtrace.c ++++ b/sysdeps/powerpc/powerpc32/backtrace.c +@@ -114,6 +114,8 @@ __backtrace (void **array, int size) + } + if (gregset) + { ++ if (count + 1 == size) ++ break; + array[++count] = (void*)((*gregset)[PT_NIP]); + current = (void*)((*gregset)[PT_R1]); + } +diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c +index 65c260a..8a53a10 100644 +--- a/sysdeps/powerpc/powerpc64/backtrace.c ++++ b/sysdeps/powerpc/powerpc64/backtrace.c +@@ -87,6 +87,8 @@ __backtrace (void **array, int size) + if (is_sigtramp_address (current->return_address)) + { + struct signal_frame_64 *sigframe = (struct signal_frame_64*) current; ++ if (count + 1 == size) ++ break; + array[++count] = (void*) sigframe->uc.uc_mcontext.gp_regs[PT_NIP]; + current = (void*) sigframe->uc.uc_mcontext.gp_regs[PT_R1]; + } +-- +1.8.3.1 + diff --git a/backport-Kunpeng-patches.patch b/backport-Kunpeng-patches.patch new file mode 100644 index 0000000..7b8fee0 --- /dev/null +++ b/backport-Kunpeng-patches.patch @@ -0,0 +1,1088 @@ +From 0dfa5db2106d75db595e83f064352fb89d92986e Mon Sep 17 00:00:00 2001 +From: wangbin224 +Date: Sat, 28 Mar 2020 19:14:41 +0800 +Subject: [PATCH] glibc: backport Kunpeng patches + +backport Kunpeng patches + +Signed-off-by: wangbin224 +--- + manual/tunables.texi | 2 +- + sysdeps/aarch64/memcmp.S | 4 +- + sysdeps/aarch64/memrchr.S | 15 +- + sysdeps/aarch64/multiarch/Makefile | 2 +- + sysdeps/aarch64/multiarch/ifunc-impl-list.c | 54 +- + sysdeps/aarch64/multiarch/memcpy.c | 9 +- + sysdeps/aarch64/multiarch/memcpy_kunpeng.S | 576 ------------------ + sysdeps/aarch64/multiarch/memmove.c | 11 +- + sysdeps/aarch64/multiarch/memset.c | 14 +- + sysdeps/aarch64/multiarch/memset_kunpeng.S | 58 +- + sysdeps/aarch64/strcpy.S | 6 +- + sysdeps/aarch64/strnlen.S | 4 +- + .../unix/sysv/linux/aarch64/cpu-features.c | 4 +- + .../unix/sysv/linux/aarch64/cpu-features.h | 7 +- + 14 files changed, 86 insertions(+), 680 deletions(-) + delete mode 100755 sysdeps/aarch64/multiarch/memcpy_kunpeng.S + +diff --git a/manual/tunables.texi b/manual/tunables.texi +index bb4819bd..124b39b6 100644 +--- a/manual/tunables.texi ++++ b/manual/tunables.texi +@@ -333,7 +333,7 @@ This tunable is specific to powerpc, powerpc64 and powerpc64le. + The @code{glibc.tune.cpu=xxx} tunable allows the user to tell @theglibc{} to + assume that the CPU is @code{xxx} where xxx may have one of these values: + @code{generic}, @code{falkor}, @code{thunderxt88}, @code{thunderx2t99}, +-@code{thunderx2t99p1}. ++@code{thunderx2t99p1}, @code{kunpeng}. + + This tunable is specific to aarch64. + @end deftp +diff --git a/sysdeps/aarch64/memcmp.S b/sysdeps/aarch64/memcmp.S +index 04129d83..a2138616 100644 +--- a/sysdeps/aarch64/memcmp.S ++++ b/sysdeps/aarch64/memcmp.S +@@ -1,6 +1,6 @@ + /* memcmp - compare memory + +- Copyright (C) 2013-2019 Free Software Foundation, Inc. ++ Copyright (C) 2013-2018 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + +@@ -16,7 +16,7 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see +- . */ ++ . */ + + #include + +diff --git a/sysdeps/aarch64/memrchr.S b/sysdeps/aarch64/memrchr.S +index 9095304b..0565168a 100644 +--- a/sysdeps/aarch64/memrchr.S ++++ b/sysdeps/aarch64/memrchr.S +@@ -16,8 +16,8 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see +- . */ +- ++ . */ ++ + #include + + /* Assumptions: +@@ -61,7 +61,7 @@ + * things occur in the original string, counting trailing zeros allows to + * identify exactly which byte has matched. + */ +- ++ + ENTRY (__memrchr) + /* Do not dereference srcin if no bytes to compare. */ + cbz cntin, L(zero_length) +@@ -101,7 +101,7 @@ ENTRY (__memrchr) + addp vend.16b, vend.16b, vend.16b /* 128->64 */ + mov synd, vend.2d[0] + /* Clear the (32-soff)*2 upper bits */ +- lsl tmp, soff, #1 ++ lsl tmp, soff, #1 + lsl synd, synd, tmp + lsr synd, synd, tmp + /* The first block can also be the last */ +@@ -135,16 +135,16 @@ L(end): + b.hi L(tail) + + L(masklast): +- /* Clear the (32 - ((cntrem + (32-soff)) % 32)) * 2 lower bits */ ++ /* Clear the (32 - ((cntrem + (32-soff)) % 32)) * 2 lower bits */ + add tmp, cntrem, soff + and tmp, tmp, #31 + sub tmp, tmp, #32 +- neg tmp, tmp, lsl #1 ++ neg tmp, tmp, lsl #1 + lsr synd, synd, tmp + lsl synd, synd, tmp + + L(tail): +- /* Compensate the last post-increment*/ ++ /* Compensate the last post-increment*/ + add seek_dst, seek_dst, #32 + /* Check that we have found a character */ + cmp synd, #0 +@@ -163,4 +163,3 @@ L(zero_length): + END (__memrchr) + weak_alias (__memrchr, memrchr) + libc_hidden_builtin_def (memrchr) +- +diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile +index 90529d40..722ed824 100644 +--- a/sysdeps/aarch64/multiarch/Makefile ++++ b/sysdeps/aarch64/multiarch/Makefile +@@ -1,4 +1,4 @@ + ifeq ($(subdir),string) +-sysdep_routines += memcpy_kunpeng memcpy_generic memcpy_thunderx memcpy_thunderx2 \ ++sysdep_routines += memcpy_generic memcpy_thunderx memcpy_thunderx2 \ + memcpy_falkor memmove_falkor memset_generic memset_falkor memset_kunpeng + endif +diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +index bef9b06d..0026dbba 100644 +--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c ++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c +@@ -1,5 +1,5 @@ + /* Enumerate available IFUNC implementations of a function. AARCH64 version. +- Copyright (C) 2017-2019 Free Software Foundation, Inc. ++ Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -25,36 +25,34 @@ + #include + + /* Maximum number of IFUNC implementations. */ +-#define MAX_IFUNC 5 ++#define MAX_IFUNC 4 + + size_t + __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) + { +- assert(max >= MAX_IFUNC); +- +- size_t i = 0; +- +- INIT_ARCH(); +- +- /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c. */ +- IFUNC_IMPL(i, name, memcpy, +- IFUNC_IMPL_ADD(array, i, memcpy, 1, __memcpy_thunderx) +- IFUNC_IMPL_ADD(array, i, memcpy, 1, __memcpy_thunderx2) +- IFUNC_IMPL_ADD(array, i, memcpy, 1, __memcpy_falkor) +- IFUNC_IMPL_ADD(array, i, memcpy, 1, __memcpy_kunpeng) +- IFUNC_IMPL_ADD(array, i, memcpy, 1, __memcpy_generic)) +- IFUNC_IMPL(i, name, memmove, +- IFUNC_IMPL_ADD(array, i, memmove, 1, __memmove_thunderx) +- IFUNC_IMPL_ADD(array, i, memmove, 1, __memmove_falkor) +- IFUNC_IMPL_ADD(array, i, memmove, 1, __memmove_kunpeng) +- IFUNC_IMPL_ADD(array, i, memmove, 1, __memmove_generic)) +- IFUNC_IMPL(i, name, memset, +- /* Enable this on non-falkor processors too so that other cores +- can do a comparative analysis with __memset_generic. */ +- IFUNC_IMPL_ADD(array, i, memset, (zva_size == 64), __memset_falkor) +- IFUNC_IMPL_ADD(array, i, memset, 1, __memset_generic) +- IFUNC_IMPL_ADD(array, i, memset, 1, __memset_kunpeng)) +- +- return i; ++ assert (max >= MAX_IFUNC); ++ ++ size_t i = 0; ++ ++ INIT_ARCH (); ++ ++ /* Support sysdeps/aarch64/multiarch/memcpy.c and memmove.c. */ ++ IFUNC_IMPL (i, name, memcpy, ++ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx) ++ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx2) ++ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor) ++ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic)) ++ IFUNC_IMPL (i, name, memmove, ++ IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx) ++ IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_falkor) ++ IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) ++ IFUNC_IMPL (i, name, memset, ++ /* Enable this on non-falkor processors too so that other cores ++ can do a comparative analysis with __memset_generic. */ ++ IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor) ++ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng) ++ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic)) ++ ++ return i; + } +diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c +index 150e1ca9..2d358a83 100644 +--- a/sysdeps/aarch64/multiarch/memcpy.c ++++ b/sysdeps/aarch64/multiarch/memcpy.c +@@ -1,5 +1,5 @@ + /* Multiple versions of memcpy. AARCH64 version. +- Copyright (C) 2017-2019 Free Software Foundation, Inc. ++ Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -32,14 +32,11 @@ extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_thunderx attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_thunderx2 attribute_hidden; + extern __typeof (__redirect_memcpy) __memcpy_falkor attribute_hidden; +-extern __typeof (__redirect_memcpy) __memcpy_kunpeng attribute_hidden; + + libc_ifunc (__libc_memcpy, +- IS_KUNPENG920(midr) +- ?__memcpy_kunpeng +- : (IS_THUNDERX (midr) ++ (IS_THUNDERX (midr) + ? __memcpy_thunderx +- : (IS_FALKOR (midr) || IS_PHECDA (midr) ++ : (IS_FALKOR (midr) || IS_PHECDA (midr) || IS_KUNPENG920 (midr) + ? __memcpy_falkor + : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr) + ? __memcpy_thunderx2 +diff --git a/sysdeps/aarch64/multiarch/memcpy_kunpeng.S b/sysdeps/aarch64/multiarch/memcpy_kunpeng.S +deleted file mode 100755 +index 2102478a..00000000 +--- a/sysdeps/aarch64/multiarch/memcpy_kunpeng.S ++++ /dev/null +@@ -1,576 +0,0 @@ +-/* A Kunpeng Optimized memcpy implementation for AARCH64. +- Copyright (C) 2018-2019 Free Software Foundation, Inc. +- +- This file is part of the GNU C Library. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +- +-/* Assumptions: +- * +- * ARMv8-a, AArch64, unaligned accesses. +- * +- */ +- +-#define dstin x0 +-#define src x1 +-#define count x2 +-#define dst x3 +-#define srcend x4 +-#define dstend x5 +-#define tmp2 x6 +-#define tmp3 x7 +-#define tmp3w w7 +-#define A_l x6 +-#define A_lw w6 +-#define A_h x7 +-#define A_hw w7 +-#define B_l x8 +-#define B_lw w8 +-#define B_h x9 +-#define C_l x10 +-#define C_h x11 +-#define D_l x12 +-#define D_h x13 +-#define E_l src +-#define E_h count +-#define F_l srcend +-#define F_h dst +-#define G_l count +-#define G_h dst +-#define tmp1 x14 +- +-#define A_q q0 +-#define B_q q1 +-#define C_q q2 +-#define D_q q3 +-#define E_q q4 +-#define F_q q5 +-#define G_q q6 +-#define H_q q7 +-#define I_q q16 +-#define J_q q17 +- +-#define A_v v0 +-#define B_v v1 +-#define C_v v2 +-#define D_v v3 +-#define E_v v4 +-#define F_v v5 +-#define G_v v6 +-#define H_v v7 +-#define I_v v16 +-#define J_v v17 +- +-#ifndef MEMMOVE +-# define MEMMOVE memmove +-#endif +-#ifndef MEMCPY +-# define MEMCPY memcpy +-#endif +- +-#if IS_IN (libc) +- +-#undef MEMCPY +-#define MEMCPY __memcpy_kunpeng +-#undef MEMMOVE +-#define MEMMOVE __memmove_kunpeng +- +- +-/* Overlapping large forward memmoves use a loop that copies backwards. +- Otherwise memcpy is used. Small moves branch to memcopy16 directly. +- The longer memcpy cases fall through to the memcpy head. +-*/ +- +-ENTRY_ALIGN (MEMMOVE, 6) +- +- DELOUSE (0) +- DELOUSE (1) +- DELOUSE (2) +- +- sub tmp1, dstin, src +- cmp count, 512 +- ccmp tmp1, count, 2, hi +- b.lo L(move_long) +- cmp count, 96 +- ccmp tmp1, count, 2, hi +- b.lo L(move_middle) +- +-END (MEMMOVE) +-libc_hidden_builtin_def (MEMMOVE) +- +- +-/* Copies are split into 3 main cases: small copies of up to 16 bytes, +- medium copies of 17..96 bytes which are fully unrolled. Large copies +- of more than 96 bytes align the destination and use load-and-merge +- approach in the case src and dst addresses are unaligned not evenly, +- so that, actual loads and stores are always aligned. +- Large copies use the loops processing 64 bytes per iteration for +- unaligned case and 128 bytes per iteration for aligned ones. +-*/ +- +-#define MEMCPY_PREFETCH_LDR 640 +- +- .p2align 4 +-ENTRY (MEMCPY) +- +- DELOUSE (0) +- DELOUSE (1) +- DELOUSE (2) +- +- add srcend, src, count +- cmp count, 16 +- b.ls L(memcopy16) +- add dstend, dstin, count +- cmp count, 96 +- b.hi L(memcopy_long) +- +- /* Medium copies: 17..96 bytes. */ +- ldr A_q, [src], #16 +- and tmp1, src, 15 +- ldr E_q, [srcend, -16] +- cmp count, 64 +- b.gt L(memcpy_copy96) +- cmp count, 48 +- b.le L(bytes_17_to_48) +- /* 49..64 bytes */ +- ldp B_q, C_q, [src] +- str E_q, [dstend, -16] +- stp A_q, B_q, [dstin] +- str C_q, [dstin, 32] +- ret +- +-L(bytes_17_to_48): +- /* 17..48 bytes*/ +- cmp count, 32 +- b.gt L(bytes_32_to_48) +- /* 17..32 bytes*/ +- str A_q, [dstin] +- str E_q, [dstend, -16] +- ret +- +-L(bytes_32_to_48): +- /* 32..48 */ +- ldr B_q, [src] +- str A_q, [dstin] +- str E_q, [dstend, -16] +- str B_q, [dstin, 16] +- ret +- +- .p2align 4 +- /* Small copies: 0..16 bytes. */ +-L(memcopy16): +- cmp count, 8 +- b.lo L(bytes_0_to_8) +- ldr A_l, [src] +- ldr A_h, [srcend, -8] +- add dstend, dstin, count +- str A_l, [dstin] +- str A_h, [dstend, -8] +- ret +- .p2align 4 +- +-L(bytes_0_to_8): +- tbz count, 2, L(bytes_0_to_3) +- ldr A_lw, [src] +- ldr A_hw, [srcend, -4] +- add dstend, dstin, count +- str A_lw, [dstin] +- str A_hw, [dstend, -4] +- ret +- +- /* Copy 0..3 bytes. Use a branchless sequence that copies the same +- byte 3 times if count==1, or the 2nd byte twice if count==2. */ +-L(bytes_0_to_3): +- cbz count, 1f +- lsr tmp1, count, 1 +- ldrb A_lw, [src] +- ldrb A_hw, [srcend, -1] +- add dstend, dstin, count +- ldrb B_lw, [src, tmp1] +- strb B_lw, [dstin, tmp1] +- strb A_hw, [dstend, -1] +- strb A_lw, [dstin] +-1: +- ret +- +- .p2align 4 +- +-L(memcpy_copy96): +- /* Copying 65..96 bytes. A_q (first 16 bytes) and +- E_q(last 16 bytes) are already loaded. The size +- is large enough to benefit from aligned loads */ +- bic src, src, 15 +- ldp B_q, C_q, [src] +- /* Loaded 64 bytes, second 16-bytes chunk can be +- overlapping with the first chunk by tmp1 bytes. +- Stored 16 bytes. */ +- sub dst, dstin, tmp1 +- add count, count, tmp1 +- /* The range of count being [65..96] becomes [65..111] +- after tmp [0..15] gets added to it, +- count now is +48 */ +- cmp count, 80 +- b.gt L(copy96_medium) +- ldr D_q, [src, 32] +- stp B_q, C_q, [dst, 16] +- str D_q, [dst, 48] +- str A_q, [dstin] +- str E_q, [dstend, -16] +- ret +- +- .p2align 4 +-L(copy96_medium): +- ldp D_q, G_q, [src, 32] +- cmp count, 96 +- b.gt L(copy96_large) +- stp B_q, C_q, [dst, 16] +- stp D_q, G_q, [dst, 48] +- str A_q, [dstin] +- str E_q, [dstend, -16] +- ret +- +-L(copy96_large): +- ldr F_q, [src, 64] +- str B_q, [dst, 16] +- stp C_q, D_q, [dst, 32] +- stp G_q, F_q, [dst, 64] +- str A_q, [dstin] +- str E_q, [dstend, -16] +- ret +- +- .p2align 4 +-L(memcopy_long): +- cmp count, 2048 +- b.ls L(copy2048_large) +- ldr A_q, [src], #16 +- and tmp1, src, 15 +- bic src, src, 15 +- ldp B_q, C_q, [src], #32 +- sub dst, dstin, tmp1 +- add count, count, tmp1 +- add dst, dst, 16 +- and tmp1, dst, 15 +- ldp D_q, E_q, [src], #32 +- str A_q, [dstin] +- +- /* Already loaded 64+16 bytes. Check if at +- least 64 more bytes left */ +- subs count, count, 64+64+16 +- b.lt L(loop128_exit0) +- cmp count, MEMCPY_PREFETCH_LDR + 64 + 32 +- b.lt L(loop128) +- cbnz tmp1, L(dst_unaligned) +- sub count, count, MEMCPY_PREFETCH_LDR + 64 + 32 +- +- .p2align 4 +- +-L(loop128_prefetch): +- prfm pldl1strm, [src, MEMCPY_PREFETCH_LDR] +- ldp F_q, G_q, [src], #32 +- stp B_q, C_q, [dst], #32 +- ldp H_q, I_q, [src], #32 +- prfm pldl1strm, [src, MEMCPY_PREFETCH_LDR] +- ldp B_q, C_q, [src], #32 +- stp D_q, E_q, [dst], #32 +- ldp D_q, E_q, [src], #32 +- stp F_q, G_q, [dst], #32 +- stp H_q, I_q, [dst], #32 +- subs count, count, 128 +- b.ge L(loop128_prefetch) +- +- add count, count, MEMCPY_PREFETCH_LDR + 64 + 32 +- .p2align 4 +-L(loop128): +- ldp F_q, G_q, [src], #32 +- ldp H_q, I_q, [src], #32 +- stp B_q, C_q, [dst], #32 +- stp D_q, E_q, [dst], #32 +- subs count, count, 64 +- b.lt L(loop128_exit1) +- ldp B_q, C_q, [src], #32 +- ldp D_q, E_q, [src], #32 +- stp F_q, G_q, [dst], #32 +- stp H_q, I_q, [dst], #32 +- subs count, count, 64 +- b.ge L(loop128) +-L(loop128_exit0): +- ldp F_q, G_q, [srcend, -64] +- ldp H_q, I_q, [srcend, -32] +- stp B_q, C_q, [dst], #32 +- stp D_q, E_q, [dst] +- stp F_q, G_q, [dstend, -64] +- stp H_q, I_q, [dstend, -32] +- ret +-L(loop128_exit1): +- ldp B_q, C_q, [srcend, -64] +- ldp D_q, E_q, [srcend, -32] +- stp F_q, G_q, [dst], #32 +- stp H_q, I_q, [dst] +- stp B_q, C_q, [dstend, -64] +- stp D_q, E_q, [dstend, -32] +- ret +- +-L(copy2048_large): +- and tmp1, dstin, 15 +- bic dst, dstin, 15 +- ldp D_l, D_h, [src] +- sub src, src, tmp1 +- add count, count, tmp1 /* Count is now 16 too large. */ +- ldp A_l, A_h, [src, 16] +- stp D_l, D_h, [dstin] +- ldp B_l, B_h, [src, 32] +- ldp C_l, C_h, [src, 48] +- ldp D_l, D_h, [src, 64]! +- subs count, count, 128 + 16 /* Test and readjust count. */ +- b.ls L(last64) +- +-L(loop64): +- stp A_l, A_h, [dst, 16] +- ldp A_l, A_h, [src, 16] +- stp B_l, B_h, [dst, 32] +- ldp B_l, B_h, [src, 32] +- stp C_l, C_h, [dst, 48] +- ldp C_l, C_h, [src, 48] +- stp D_l, D_h, [dst, 64] +- ldp D_l, D_h, [src, 64] +- add dst, dst, 64 +- add src, src, 64 +- subs count, count, 64 +- b.hi L(loop64) +- +- /* Write the last full set of 64 bytes. The remainder is at most 64 +- bytes, so it is safe to always copy 64 bytes from the end even if +- there is just 1 byte left. */ +-L(last64): +- ldp E_l, E_h, [srcend, -64] +- stp A_l, A_h, [dst, 16] +- ldp A_l, A_h, [srcend, -48] +- stp B_l, B_h, [dst, 32] +- ldp B_l, B_h, [srcend, -32] +- stp C_l, C_h, [dst, 48] +- ldp C_l, C_h, [srcend, -16] +- stp D_l, D_h, [dst, 64] +- stp E_l, E_h, [dstend, -64] +- stp A_l, A_h, [dstend, -48] +- stp B_l, B_h, [dstend, -32] +- stp C_l, C_h, [dstend, -16] +- ret +- +- +-L(dst_unaligned_tail): +- ldp C_q, D_q, [srcend, -64] +- ldp E_q, F_q, [srcend, -32] +- stp A_q, B_q, [dst], #32 +- stp H_q, I_q, [dst], #16 +- str G_q, [dst, tmp1] +- stp C_q, D_q, [dstend, -64] +- stp E_q, F_q, [dstend, -32] +- ret +- +-L(dst_unaligned): +- /* For the unaligned store case the code loads two +- aligned chunks and then merges them using ext +- instruction. This can be up to 30% faster than +- the the simple unaligned store access. +- +- Current state: tmp1 = dst % 16; C_q, D_q, E_q +- contains data yet to be stored. src and dst points +- to next-to-be-processed data. A_q, B_q contains +- data already stored before, count = bytes left to +- be load decremented by 64. +- +- The control is passed here if at least 64 bytes left +- to be loaded. The code does two aligned loads and then +- extracts (16-tmp1) bytes from the first register and +- tmp1 bytes from the next register forming the value +- for the aligned store. +- +- As ext instruction can only have it's index encoded +- as immediate. 15 code chunks process each possible +- index value. Computed goto is used to reach the +- required code. */ +- +- /* Store the 16 bytes to dst and align dst for further +- operations, several bytes will be stored at this +- address once more */ +- +- ldp F_q, G_q, [src], #32 +- stp B_q, C_q, [dst], #32 +- bic dst, dst, 15 +- sub count, count, 32 +- adrp tmp2, L(ext_table) +- add tmp2, tmp2, :lo12:L(ext_table) +- add tmp2, tmp2, tmp1, LSL #2 +- ldr tmp3w, [tmp2] +- add tmp2, tmp2, tmp3w, SXTW +- br tmp2 +- +-.p2align 4 +- /* to make the loop in each chunk 16-bytes aligned */ +- nop +-#define EXT_CHUNK(shft) \ +-L(ext_size_ ## shft):;\ +- ext A_v.16b, C_v.16b, D_v.16b, 16-shft;\ +- ext B_v.16b, D_v.16b, E_v.16b, 16-shft;\ +- ext H_v.16b, E_v.16b, F_v.16b, 16-shft;\ +-1:;\ +- stp A_q, B_q, [dst], #32;\ +- prfm pldl1strm, [src, MEMCPY_PREFETCH_LDR];\ +- ldp C_q, D_q, [src], #32;\ +- ext I_v.16b, F_v.16b, G_v.16b, 16-shft;\ +- stp H_q, I_q, [dst], #32;\ +- ext A_v.16b, G_v.16b, C_v.16b, 16-shft;\ +- ext B_v.16b, C_v.16b, D_v.16b, 16-shft;\ +- ldp F_q, G_q, [src], #32;\ +- ext H_v.16b, D_v.16b, F_v.16b, 16-shft;\ +- subs count, count, 64;\ +- b.ge 1b;\ +-2:;\ +- ext I_v.16b, F_v.16b, G_v.16b, 16-shft;\ +- b L(dst_unaligned_tail); +- +-EXT_CHUNK(1) +-EXT_CHUNK(2) +-EXT_CHUNK(3) +-EXT_CHUNK(4) +-EXT_CHUNK(5) +-EXT_CHUNK(6) +-EXT_CHUNK(7) +-EXT_CHUNK(8) +-EXT_CHUNK(9) +-EXT_CHUNK(10) +-EXT_CHUNK(11) +-EXT_CHUNK(12) +-EXT_CHUNK(13) +-EXT_CHUNK(14) +-EXT_CHUNK(15) +- +-.p2align 4 +-L(move_long): +-1: +- add srcend, src, count +- add dstend, dstin, count +- +- and tmp1, dstend, 15 +- ldr D_q, [srcend, -16] +- sub srcend, srcend, tmp1 +- sub count, count, tmp1 +- ldp A_q, B_q, [srcend, -32] +- str D_q, [dstend, -16] +- ldp C_q, D_q, [srcend, -64]! +- sub dstend, dstend, tmp1 +- subs count, count, 128 +- b.ls 2f +- +-.p2align 4 +-1: +- subs count, count, 64 +- stp A_q, B_q, [dstend, -32] +- ldp A_q, B_q, [srcend, -32] +- stp C_q, D_q, [dstend, -64]! +- ldp C_q, D_q, [srcend, -64]! +- b.hi 1b +- +- /* Write the last full set of 64 bytes. The remainder is at most 64 +- bytes, so it is safe to always copy 64 bytes from the start even if +- there is just 1 byte left. */ +-2: +- ldp E_q, F_q, [src, 32] +- ldp G_q, H_q, [src] +- stp A_q, B_q, [dstend, -32] +- stp C_q, D_q, [dstend, -64] +- stp E_q, F_q, [dstin, 32] +- stp G_q, H_q, [dstin] +-3: ret +- +- +-.p2align 4 +-L(move_middle): +- cbz tmp1, 3f +- add srcend, src, count +- prfm PLDL1STRM, [srcend, -64] +- add dstend, dstin, count +- and tmp1, dstend, 15 +- ldr D_q, [srcend, -16] +- sub srcend, srcend, tmp1 +- sub count, count, tmp1 +- ldr A_q, [srcend, -16] +- str D_q, [dstend, -16] +- ldr B_q, [srcend, -32] +- ldr C_q, [srcend, -48] +- ldr D_q, [srcend, -64]! +- sub dstend, dstend, tmp1 +- subs count, count, 128 +- b.ls 2f +- +-1: +- str A_q, [dstend, -16] +- ldr A_q, [srcend, -16] +- str B_q, [dstend, -32] +- ldr B_q, [srcend, -32] +- str C_q, [dstend, -48] +- ldr C_q, [srcend, -48] +- str D_q, [dstend, -64]! +- ldr D_q, [srcend, -64]! +- subs count, count, 64 +- b.hi 1b +- +- /* Write the last full set of 64 bytes. The remainder is at most 64 +- bytes, so it is safe to always copy 64 bytes from the start even if +- there is just 1 byte left. */ +-2: +- ldr G_q, [src, 48] +- str A_q, [dstend, -16] +- ldr A_q, [src, 32] +- str B_q, [dstend, -32] +- ldr B_q, [src, 16] +- str C_q, [dstend, -48] +- ldr C_q, [src] +- str D_q, [dstend, -64] +- str G_q, [dstin, 48] +- str A_q, [dstin, 32] +- str B_q, [dstin, 16] +- str C_q, [dstin] +-3: ret +- +- +-END (MEMCPY) +- .section .rodata +- .p2align 4 +- +-L(ext_table): +- /* The first entry is for the alignment of 0 and is never +- actually used (could be any value). */ +- .word 0 +- .word L(ext_size_1) -. +- .word L(ext_size_2) -. +- .word L(ext_size_3) -. +- .word L(ext_size_4) -. +- .word L(ext_size_5) -. +- .word L(ext_size_6) -. +- .word L(ext_size_7) -. +- .word L(ext_size_8) -. +- .word L(ext_size_9) -. +- .word L(ext_size_10) -. +- .word L(ext_size_11) -. +- .word L(ext_size_12) -. +- .word L(ext_size_13) -. +- .word L(ext_size_14) -. +- .word L(ext_size_15) -. +- +-libc_hidden_builtin_def (MEMCPY) +-#endif +diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c +index 0d8c85b4..e69d8162 100644 +--- a/sysdeps/aarch64/multiarch/memmove.c ++++ b/sysdeps/aarch64/multiarch/memmove.c +@@ -1,5 +1,5 @@ + /* Multiple versions of memmove. AARCH64 version. +- Copyright (C) 2017-2019 Free Software Foundation, Inc. ++ Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -31,16 +31,13 @@ extern __typeof (__redirect_memmove) __libc_memmove; + extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden; + extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden; +-extern __typeof (__redirect_memmove) __memmove_kunpeng attribute_hidden; +- ++ + libc_ifunc (__libc_memmove, +- (IS_KUNPENG920(midr) +- ?__memmove_kunpeng +- :(IS_THUNDERX (midr) ++ (IS_THUNDERX (midr) + ? __memmove_thunderx + : (IS_FALKOR (midr) || IS_PHECDA (midr) + ? __memmove_falkor +- : __memmove_generic)))); ++ : __memmove_generic))); + + # undef memmove + strong_alias (__libc_memmove, memmove); +diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c +index 0f7ad0c8..f7ae291e 100644 +--- a/sysdeps/aarch64/multiarch/memset.c ++++ b/sysdeps/aarch64/multiarch/memset.c +@@ -1,5 +1,5 @@ + /* Multiple versions of memset. AARCH64 version. +- Copyright (C) 2017-2019 Free Software Foundation, Inc. ++ Copyright (C) 2017-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -29,15 +29,15 @@ + extern __typeof (__redirect_memset) __libc_memset; + + extern __typeof (__redirect_memset) __memset_falkor attribute_hidden; +-extern __typeof (__redirect_memset) __memset_generic attribute_hidden; + extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden; ++extern __typeof (__redirect_memset) __memset_generic attribute_hidden; + + libc_ifunc (__libc_memset, +- IS_KUNPENG920(midr) +- ?__memset_kunpeng +- :((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64 +- ?__memset_falkor +- :__memset_generic)); ++ IS_KUNPENG920 (midr) ++ ?__memset_kunpeng ++ : ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64 ++ ? __memset_falkor ++ : __memset_generic)); + + # undef memset + strong_alias (__libc_memset, memset); +diff --git a/sysdeps/aarch64/multiarch/memset_kunpeng.S b/sysdeps/aarch64/multiarch/memset_kunpeng.S +index 22a3d4a7..a03441ae 100644 +--- a/sysdeps/aarch64/multiarch/memset_kunpeng.S ++++ b/sysdeps/aarch64/multiarch/memset_kunpeng.S +@@ -1,4 +1,5 @@ +-/* Copyright (C) 2012-2019 Free Software Foundation, Inc. ++/* Optimized memset for Huawei Kunpeng processor. ++ Copyright (C) 2012-2019 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + +@@ -14,7 +15,7 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see +- . */ ++ . */ + + #include + #include +@@ -35,7 +36,7 @@ ENTRY_ALIGN (MEMSET, 6) + + dup v0.16B, valw + add dstend, dstin, count +- ++ + cmp count, 128 + b.hs L(set_long) + +@@ -44,7 +45,7 @@ ENTRY_ALIGN (MEMSET, 6) + + /* Set 16..127 bytes. */ + str q0, [dstin] +- tbnz count, 6, L(set112) ++ tbnz count, 6, L(set127) + str q0, [dstend, -16] + tbz count, 5, 1f + str q0, [dstin, 16] +@@ -53,26 +54,14 @@ ENTRY_ALIGN (MEMSET, 6) + + .p2align 4 + /* Set 64..127 bytes. Write 64 bytes from the start and +- 32 bytes from the end. */ +-L(set112): +- ands tmp1, dstin, 15 +- bne 2f +- str q0, [dstin, 16] +- stp q0, q0, [dstin, 32]//finish 64 +- tbz count, 5, 1f +- stp q0, q0, [dstin, 64] //大于96, finish 96 +-1: stp q0, q0, [dstend, -32] ++ 64 bytes from the end. */ ++L(set127): ++ stp q0, q0, [dstin, 16] ++ str q0, [dstin, 48] ++ stp q0, q0, [dstend, -64] ++ stp q0, q0, [dstend, -32] + ret +- .p2align 4 +-2: bic dst, dstin, 15//回退到16对齐 +- stp q0,q0, [dst, 16] +- str q0, [dst, 48] +- tbz count, 5, 3f //大于96 +- stp q0, q0, [dst, 64] +-3: stp q0, q0, [dstend, -48]//finish 64~80 +- str q0, [dstend, -16]//finish 96 +- ret +- ++ + .p2align 4 + /* Set 0..15 bytes. */ + L(less16): +@@ -90,10 +79,9 @@ L(less8): + tbz count, 1, 3f + str h0, [dstend, -2] + 3: ret +- ++ + .p2align 4 +-L(set_long): +- and valw, valw, 255 ++L(set_long): + bic dst, dstin, 15 + str q0, [dstin] + sub count, dstend, dst /* Count is 16 too large. */ +@@ -103,19 +91,21 @@ L(set_long): + stp q0, q0, [dst, 64]! + subs count, count, 64 + b.lo 1f +- stp q0, q0, [dst, 32] ++ stp q0, q0, [dst, 32] + stp q0, q0, [dst, 64]! + subs count, count, 64 + b.lo 1f +- stp q0, q0, [dst, 32] ++ stp q0, q0, [dst, 32] + stp q0, q0, [dst, 64]! + subs count, count, 64 +- b.hs 1b +- +-1: tbz count, 5, 2f +- str q0, [dst, 32] +- str q0, [dst, 48] +-2: stp q0, q0, [dstend, -32] ++ b.lo 1f ++ stp q0, q0, [dst, 32] ++ stp q0, q0, [dst, 64]! ++ subs count, count, 64 ++ b.hs 1b ++ ++1: stp q0, q0, [dstend, -64] ++ stp q0, q0, [dstend, -32] + ret + + END (MEMSET) +diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S +index 290bcf8d..a64c5980 100644 +--- a/sysdeps/aarch64/strcpy.S ++++ b/sysdeps/aarch64/strcpy.S +@@ -1,5 +1,5 @@ + /* strcpy/stpcpy - copy a string returning pointer to start/end. +- Copyright (C) 2013-2019 Free Software Foundation, Inc. ++ Copyright (C) 2013-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or +@@ -14,7 +14,7 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see +- . */ ++ . */ + + /* To build as stpcpy, define BUILD_STPCPY before compiling this file. + +@@ -232,7 +232,7 @@ L(entry_no_page_cross): + #ifdef __AARCH64EB__ + rev64 datav.16b, datav.16b + #endif +- /* loc */ ++ /* calculate the loc value */ + cmeq datav.16b, datav.16b, #0 + mov data1, datav.d[0] + mov data2, datav.d[1] +diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S +index a57753b0..0a42f404 100644 +--- a/sysdeps/aarch64/strnlen.S ++++ b/sysdeps/aarch64/strnlen.S +@@ -1,6 +1,6 @@ + /* strnlen - calculate the length of a string with limit. + +- Copyright (C) 2013-2019 Free Software Foundation, Inc. ++ Copyright (C) 2013-2018 Free Software Foundation, Inc. + + This file is part of the GNU C Library. + +@@ -16,7 +16,7 @@ + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see +- . */ ++ . */ + + #include + +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +index b152c4e3..e60485b0 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +@@ -1,6 +1,6 @@ + /* Initialize CPU feature data. AArch64 version. + This file is part of the GNU C Library. +- Copyright (C) 2017-2019 Free Software Foundation, Inc. ++ Copyright (C) 2017-2018 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public +@@ -36,7 +36,7 @@ static struct cpu_list cpu_list[] = { + {"thunderx2t99", 0x431F0AF0}, + {"thunderx2t99p1", 0x420F5160}, + {"phecda", 0x680F0000}, +- {"kunpeng920", 0x481FD010}, ++ {"kunpeng920", 0x481FD010}, + {"generic", 0x0} + }; + +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +index 4faeed7a..ed77cde7 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h +@@ -1,6 +1,6 @@ + /* Initialize CPU feature data. AArch64 version. + This file is part of the GNU C Library. +- Copyright (C) 2017-2019 Free Software Foundation, Inc. ++ Copyright (C) 2017-2018 Free Software Foundation, Inc. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public +@@ -51,8 +51,9 @@ + + #define IS_PHECDA(midr) (MIDR_IMPLEMENTOR(midr) == 'h' \ + && MIDR_PARTNUM(midr) == 0x000) +-#define IS_KUNPENG920(midr) (MIDR_IMPLEMENTOR(midr) == 'H' \ +- && MIDR_PARTNUM(midr) == 0xd01) ++ ++#define IS_KUNPENG920(midr) (MIDR_IMPLEMENTOR(midr) == 'H' \ ++ && MIDR_PARTNUM(midr) == 0xd01) + + struct cpu_features + { +-- +2.19.1 + diff --git a/glibc.spec b/glibc.spec index 1dc2725..3cd5dc8 100644 --- a/glibc.spec +++ b/glibc.spec @@ -26,7 +26,7 @@ # - Run smoke tests with valgrind to verify dynamic loader. # - Default: Always run valgrind tests if there is architecture support. ############################################################################## -%bcond_with testsuite +%bcond_without testsuite %bcond_without benchtests %bcond_with bootstrap %bcond_without werror @@ -59,7 +59,7 @@ ############################################################################## Name: glibc Version: 2.28 -Release: 36 +Release: 39 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -74,6 +74,9 @@ Source6: LicenseList Source7: LanguageList Patch0: Fix-use-after-free-in-glob-when-expanding-user-bug-2.patch +Patch1: backport-Kunpeng-patches.patch +Patch2: Avoid-ldbl-96-stack-corruption-from-range-reduction-.patch +Patch3: backport-CVE-2020-1751-Fix-array-overflow-in-backtrace-on-PowerPC-bug-25423.patch Provides: ldconfig rtld(GNU_HASH) bundled(gnulib) @@ -918,6 +921,15 @@ fi %changelog +* Sat May 23 2020 liqingqing - 2.28-39 +- Fix array overflow in backtrace on PowerPC (bug 25423) + +* Tue Apr 28 2020 liqingqing - 2.28-38 +- Avoid ldbl-96 stack corruption from range reduction of pseudo-zero (bug 25487) + +* Thu Apr 16 2020 wangbin - 2.28-37 +- backport Kunpeng patches + * Thu Mar 19 2020 yuxiangyang - 2.28-36 - fix build src.rpm error -- Gitee From c64f222a7f3c8669b34d26799c3126502edeeccc Mon Sep 17 00:00:00 2001 From: wangshuo Date: Thu, 2 Jul 2020 20:45:41 +0800 Subject: [PATCH 2/2] add filelist and backport many patch --- Do-not-use-gettimeofday-in-random-id.patch | 62 +++ ...-offset-compare-condition-check-BZ-2.patch | 52 +++ ...state-after-second-wchar_t-output-Bu.patch | 251 +++++++++++ glibc.spec | 419 ++++++++++++------ nptl-Don-t-madvise-user-provided-stack.patch | 41 ++ ...nding-setxid-request-also-in-detache.patch | 52 +++ turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch | 25 ++ ...P-on-__x86_shared_non_temporal_thres.patch | 50 +++ ...with-vpxor-to-clear-a-vector-registe.patch | 43 ++ 9 files changed, 865 insertions(+), 130 deletions(-) create mode 100644 Do-not-use-gettimeofday-in-random-id.patch create mode 100644 Fix-avx2-strncmp-offset-compare-condition-check-BZ-2.patch create mode 100644 Reset-converter-state-after-second-wchar_t-output-Bu.patch create mode 100644 nptl-Don-t-madvise-user-provided-stack.patch create mode 100644 nptl-wait-for-pending-setxid-request-also-in-detache.patch create mode 100644 turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch create mode 100644 x86-64-Use-RDX_LP-on-__x86_shared_non_temporal_thres.patch create mode 100644 x86_64-Use-xmmN-with-vpxor-to-clear-a-vector-registe.patch diff --git a/Do-not-use-gettimeofday-in-random-id.patch b/Do-not-use-gettimeofday-in-random-id.patch new file mode 100644 index 0000000..2b1b2f2 --- /dev/null +++ b/Do-not-use-gettimeofday-in-random-id.patch @@ -0,0 +1,62 @@ +From 359653aaacad463d916323f03c0ac3c47405aafa Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Wed, 16 Jan 2019 18:10:56 +0000 +Subject: [PATCH] Do not use HP_TIMING_NOW for random bits + +This patch removes the HP_TIMING_BITS usage for fast random bits and replace +with clock_gettime (CLOCK_MONOTONIC). It has unspecified starting time and +nano-second accuracy, so its randomness is significantly better than +gettimeofday. + +Althoug it should incur in more overhead (specially for architecture that +support hp-timing), the symbol is also common implemented as a vDSO. + +Checked on aarch64-linux-gnu, x86_64-linux-gnu, and i686-linux-gnu. I also +checked on a i686-gnu build. + + * include/random-bits.h: New file. + * resolv/res_mkquery.c [HP_TIMING_AVAIL] (RANDOM_BITS, + (__res_context_mkquery): Remove usage hp-timing usage and replace with + random_bits. + * resolv/res_send.c [HP_TIMING_AVAIL] (nameserver_offset): Likewise. + * sysdeps/posix/tempname.c [HP_TIMING_AVAIL] (__gen_tempname): + Likewise. + +note that this patch is just parts of the origin one to adapt glibc-2.28  + + +--- + resolv/res_mkquery.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/resolv/res_mkquery.c b/resolv/res_mkquery.c +index 213abeef..7ba40640 100644 +--- a/resolv/res_mkquery.c ++++ b/resolv/res_mkquery.c +@@ -95,6 +95,7 @@ + + #include + #include ++#include + #if HP_TIMING_AVAIL + # define RANDOM_BITS(Var) { uint64_t v64; HP_TIMING_NOW (v64); Var = v64; } + #endif +@@ -124,9 +125,12 @@ __res_context_mkquery (struct resolv_context *ctx, int op, const char *dname, + #ifdef RANDOM_BITS + RANDOM_BITS (randombits); + #else +- struct timeval tv; +- __gettimeofday (&tv, NULL); +- randombits = (tv.tv_sec << 8) ^ tv.tv_usec; ++ struct timespec tv; ++ clock_gettime (CLOCK_MONOTONIC, &tv); ++ /* Shuffle the lower bits to minimize the clock bias. */ ++ uint32_t ret = tv.tv_nsec ^ tv.tv_sec; ++ ret ^= (ret << 24) | (ret >> 8); ++ randombits = ret; + #endif + + hp->id = randombits; +-- +2.19.1 + diff --git a/Fix-avx2-strncmp-offset-compare-condition-check-BZ-2.patch b/Fix-avx2-strncmp-offset-compare-condition-check-BZ-2.patch new file mode 100644 index 0000000..a613744 --- /dev/null +++ b/Fix-avx2-strncmp-offset-compare-condition-check-BZ-2.patch @@ -0,0 +1,52 @@ +From 75870237ff3bb363447b03f4b0af100227570910 Mon Sep 17 00:00:00 2001 +From: Sunil K Pandey +Date: Fri, 12 Jun 2020 08:57:16 -0700 +Subject: [PATCH] Fix avx2 strncmp offset compare condition check [BZ #25933] + +strcmp-avx2.S: In avx2 strncmp function, strings are compared in +chunks of 4 vector size(i.e. 32x4=128 byte for avx2). After first 4 +vector size comparison, code must check whether it already passed +the given offset. This patch implement avx2 offset check condition +for strncmp function, if both string compare same for first 4 vector +size. +--- + sysdeps/x86_64/multiarch/strcmp-avx2.S | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S +index 5f88a68262..d42b04b54f 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S +@@ -591,7 +591,14 @@ L(loop_cross_page_2_vec): + movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi + + testq %rdi, %rdi ++# ifdef USE_AS_STRNCMP ++ /* At this point, if %rdi value is 0, it already tested ++ VEC_SIZE*4+%r10 byte starting from %rax. This label ++ checks whether strncmp maximum offset reached or not. */ ++ je L(string_nbyte_offset_check) ++# else + je L(back_to_loop) ++# endif + tzcntq %rdi, %rcx + addq %r10, %rcx + /* Adjust for number of bytes skipped. */ +@@ -627,6 +634,14 @@ L(loop_cross_page_2_vec): + VZEROUPPER + ret + ++# ifdef USE_AS_STRNCMP ++L(string_nbyte_offset_check): ++ leaq (VEC_SIZE * 4)(%r10), %r10 ++ cmpq %r10, %r11 ++ jbe L(zero) ++ jmp L(back_to_loop) ++# endif ++ + .p2align 4 + L(cross_page_loop): + /* Check one byte/dword at a time. */ +-- +2.19.1 + diff --git a/Reset-converter-state-after-second-wchar_t-output-Bu.patch b/Reset-converter-state-after-second-wchar_t-output-Bu.patch new file mode 100644 index 0000000..b616737 --- /dev/null +++ b/Reset-converter-state-after-second-wchar_t-output-Bu.patch @@ -0,0 +1,251 @@ +From c580e6466d6da8262820cdbad19f32c5546226cf Mon Sep 17 00:00:00 2001 +From: Carlos O'Donell +Date: Fri, 27 Mar 2020 17:03:36 -0400 +Subject: [PATCH] Reset converter state after second wchar_t output (Bug 25734) + +An input BIG5-HKSCS character may be converted into at most 2 wchar_t +characters. After outputting the second whcar_t character (which was +saved in the converter state) we must reset the state. If we fail +to reset the state we will be stuck continually copying that +character to the output even if we have further input to consider. + +We add a new test case that covers the 4 BIG5-HKSCS characters +that may become 2 wchar_t characters. + +Reviewed-by: Tom Honermann +--- + iconvdata/Makefile | 17 ++- + iconvdata/big5hkscs.c | 3 + + iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c | 160 ++++++++++++++++++++++ + 3 files changed, 176 insertions(+), 4 deletions(-) + create mode 100644 iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c + +diff --git a/iconvdata/Makefile b/iconvdata/Makefile +index c83962f351b..4ec2741cdce 100644 +--- a/iconvdata/Makefile ++++ b/iconvdata/Makefile +@@ -73,7 +73,7 @@ modules.so := $(addsuffix .so, $(modules)) + ifeq (yes,$(build-shared)) + tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ + tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ +- bug-iconv10 bug-iconv11 bug-iconv12 ++ bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 + ifeq ($(have-thread-library),yes) + tests += bug-iconv3 + endif +@@ -275,16 +275,21 @@ endif + endif + endif + +-include ../Rules +- + ifeq ($(run-built-tests),yes) +-LOCALES := de_DE.UTF-8 ++LOCALES := \ ++ de_DE.UTF-8 \ ++ zh_HK.BIG5-HKSCS \ ++ $(NULL) ++ + include ../gen-locales.mk + + $(objpfx)bug-iconv6.out: $(gen-locales) + $(objpfx)tst-iconv7.out: $(gen-locales) ++$(objpfx)tst-iconv-big5-hkscs-to-2ucs4.out: $(gen-locales) + endif + ++include ../Rules ++ + # Set libof-* for each routine. + cpp-srcs-left := $(modules) $(generated-modules) $(libJIS-routines) \ + $(libKSC-routines) $(libGB-routines) $(libCNS-routines) \ +@@ -340,3 +345,7 @@ tst-tables-clean: + + $(objpfx)gconv-modules: gconv-modules + cat $(sysdeps-gconv-modules) $^ > $@ ++ ++# Test requires BIG5HKSCS. ++$(objpfx)tst-iconv-big5-hkscs-to-2ucs4.out: $(objpfx)gconv-modules \ ++ $(addprefix $(objpfx),$(modules.so)) +diff --git a/iconvdata/big5hkscs.c b/iconvdata/big5hkscs.c +index 01fcfeba76b..ef325119b18 100644 +--- a/iconvdata/big5hkscs.c ++++ b/iconvdata/big5hkscs.c +@@ -17895,6 +17895,9 @@ static struct + else \ + ++inptr; \ + } \ ++ else \ ++ /* Clear the queue and proceed to output the saved character. */ \ ++ *statep = 0; \ + \ + put32 (outptr, ch); \ + outptr += 4; \ +diff --git a/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c +new file mode 100644 +index 00000000000..8389adebf27 +--- /dev/null ++++ b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c +@@ -0,0 +1,160 @@ ++/* Verify the BIG5HKSCS outputs that generate 2 wchar_t's (Bug 25734). ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* A few BIG5-HKSCS characters map in two unicode code points. ++ They are: ++ /x88/x62 => ++ /x88/x64 => ++ /x88/xa3 => ++ /x88/xa5 => ++ Each of these is special cased in iconvdata/big5hkscs.c. ++ This test ensures that we correctly reset the shift state after ++ outputting any of these characters. We do this by converting ++ each them followed by converting an ASCII character. If we fail ++ to reset the shift state (bug 25734) then we'll see the last ++ character in the queue output again. */ ++ ++/* Each test has name, input bytes, and expected wide character ++ output. */ ++struct testdata { ++ const char *name; ++ const char input[3]; ++ wchar_t expected[3]; ++}; ++ ++/* In BIG5-HKSCS (2008) there are 4 characters that generate multiple ++ wide characters. */ ++struct testdata tests[4] = { ++ /* X => X */ ++ { "", "\x88\x62\x58", { 0x00CA, 0x0304, 0x0058 } }, ++ /* X => X */ ++ { "", "\x88\x64\x58", { 0x00CA, 0x030C, 0x0058 } }, ++ /* X => X */ ++ { "", "\x88\xa3\x58", { 0x00EA, 0x0304, 0x0058 } }, ++ /* X => X */ ++ { "", "\x88\xa5\x58", { 0x00EA, 0x030C, 0x0058 } } ++}; ++ ++/* Each test is of the form: ++ - Translate first code sequence (two bytes) ++ - Translate second (zero bytes) ++ - Translate the third (one byte). */ ++static int ++check_conversion (struct testdata test) ++{ ++ int err = 0; ++ wchar_t wc; ++ mbstate_t st; ++ size_t ret; ++ const char *mbs = test.input; ++ int consumed = 0; ++ /* Input is always 3 bytes long. */ ++ int inlen = 3; ++ ++ memset (&st, 0, sizeof (st)); ++ /* First conversion: Consumes first 2 bytes. */ ++ ret = mbrtowc (&wc, mbs, inlen - consumed, &st); ++ if (ret != 2) ++ { ++ printf ("error: First conversion consumed only %zd bytes.\n", ret); ++ err++; ++ } ++ /* Advance the two consumed bytes. */ ++ mbs += ret; ++ consumed += ret; ++ if (wc != test.expected[0]) ++ { ++ printf ("error: Result of first conversion was wrong.\n"); ++ err++; ++ } ++ /* Second conversion: Consumes 0 bytes. */ ++ ret = mbrtowc (&wc, mbs, inlen - consumed, &st); ++ if (ret != 0) ++ { ++ printf ("error: Second conversion consumed only %zd bytes.\n", ret); ++ err++; ++ } ++ /* Advance the zero consumed bytes. */ ++ mbs += ret; ++ consumed += ret; ++ if (wc != test.expected[1]) ++ { ++ printf ("error: Result of second conversion was wrong.\n"); ++ err++; ++ } ++ /* After the second conversion the state of the converter should be ++ in the initial state. It is in the initial state because the two ++ input BIG5-HKSCS bytes have been consumed and the 2 wchar_t's have ++ been output. */ ++ if (mbsinit (&st) == 0) ++ { ++ printf ("error: Converter not in initial state.\n"); ++ err++; ++ } ++ /* Third conversion: Consumes 1 byte (it's an ASCII character). */ ++ ret = mbrtowc (&wc, mbs, inlen - consumed, &st); ++ if (ret != 1) ++ { ++ printf ("error: Third conversion consumed only %zd bytes.\n", ret); ++ err++; ++ } ++ /* Advance the one byte. */ ++ mbs += ret; ++ consumed += ret; ++ if (wc != test.expected[2]) ++ { ++ printf ("error: Result of third conversion was wrong.\n"); ++ err++; ++ } ++ /* Return 0 if we saw no errors. */ ++ return err; ++} ++ ++static int ++do_test (void) ++{ ++ int err = 0; ++ int ret; ++ /* Testing BIG5-HKSCS. */ ++ setlocale (LC_ALL, "zh_HK.BIG5-HKSCS"); ++ ++ /* Run all the special conversions. */ ++ for (int i = 0; i < (sizeof (tests) / sizeof (struct testdata)); i++) ++ { ++ printf ("Running test for %s\n", tests[i].name); ++ ret = check_conversion (tests[i]); ++ if (ret > 0) ++ printf ("Test %s failed.\n", tests[i].name); ++ err += ret; ++ } ++ ++ /* Fail if any conversion had an error. */ ++ if (err > 0) ++ FAIL_EXIT1 ("One or more conversions failed."); ++ ++ return 0; ++} ++ ++#include +-- +2.19.1 + diff --git a/glibc.spec b/glibc.spec index 3cd5dc8..c1a4503 100644 --- a/glibc.spec +++ b/glibc.spec @@ -59,7 +59,7 @@ ############################################################################## Name: glibc Version: 2.28 -Release: 39 +Release: 40 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -77,6 +77,14 @@ Patch0: Fix-use-after-free-in-glob-when-expanding-user-bug-2.patch Patch1: backport-Kunpeng-patches.patch Patch2: Avoid-ldbl-96-stack-corruption-from-range-reduction-.patch Patch3: backport-CVE-2020-1751-Fix-array-overflow-in-backtrace-on-PowerPC-bug-25423.patch +Patch4: Do-not-use-gettimeofday-in-random-id.patch +Patch5: Reset-converter-state-after-second-wchar_t-output-Bu.patch +Patch6: Fix-avx2-strncmp-offset-compare-condition-check-BZ-2.patch +Patch7: nptl-wait-for-pending-setxid-request-also-in-detache.patch +Patch8: x86-64-Use-RDX_LP-on-__x86_shared_non_temporal_thres.patch +Patch9: x86_64-Use-xmmN-with-vpxor-to-clear-a-vector-registe.patch +Patch10: nptl-Don-t-madvise-user-provided-stack.patch +Patch11: turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch Provides: ldconfig rtld(GNU_HASH) bundled(gnulib) @@ -161,6 +169,7 @@ Summary: All language packs for %{name}. Requires: %{name} = %{version}-%{release} Requires: %{name}-common = %{version}-%{release} Provides: %{name}-langpack = %{version}-%{release} +Obsoletes: %{name}-minimal-langpack = 2.28 %{lua: -- List the Symbol provided by all-langpacks @@ -168,7 +177,7 @@ lang_provides = {} for line in io.lines(rpm.expand("%{SOURCE7}")) do print(rpm.expand([[ Provides:]]..line..[[ = %{version}-%{release} -Obsoletes:]]..line..[[ +Obsoletes:]]..line..[[ = 2.28 ]])) end } @@ -212,8 +221,8 @@ Provides: %{name}-headers = %{version}-%{release} Provides: %{name}-headers(%{_target_cpu}) Provides: %{name}-headers%{_isa} = %{version}-%{release} -Obsoletes: %{name}-static -Obsoletes: %{name}-headers +Obsoletes: %{name}-static = 2.28 +Obsoletes: %{name}-headers = 2.28 %description devel The glibc-devel package contains the object files necessary for developing @@ -248,7 +257,7 @@ Provides: nss_db = %{version}-%{release} Provides: nss_db%{_isa} = %{version}-%{release} Provides: nss_hesiod = %{version}-%{release} Provides: nss_hesiod%{_isa} = %{version}-%{release} -Obsoletes: nss_db nss_hesiod +Obsoletes: nss_db = 2.28, nss_hesiod = 2.28 %description -n nss_modules This package contains nss_db and nss_hesiod. The former uses hash-indexed files @@ -299,19 +308,47 @@ to run microbenchmark tests on the system. %package debugutils Summary: debug files for %{name} Requires: %{name} = %{version}-%{release} -Requires: %{name}-debuginfo = %{version}-%{release} - -Provides: %{name}-debuginfo = %{version}-%{release} -Provides: %{name}-debuginfo%{_isa} = %{version}-%{release} Provides: %{name}-utils = %{version}-%{release} Provides: %{name}-utils%{_isa} = %{version}-%{release} -Obsoletes: %{name}-utils +Obsoletes: %{name}-utils = 2.28 %description debugutils -This package provides many static files for debug. Besides, It contain memusage, -a memory usage profiler, mtrace, a memory leak tracer and xtrace, a function -call tracer, all of which is not necessory for you. +This package provides memusage, a memory usage profiler, mtrace, a memory leak +tracer and xtrace, a function call tracer, all of which is not necessory for you. + +############################################################################## +# glibc debuginfo sub-package +############################################################################## +%if 0%{?_enable_debug_packages} +%define debug_package %{nil} +%define __debug_install_post %{nil} +%global __debug_package 1 + +%undefine _debugsource_packages +%undefine _debuginfo_subpackages +%undefine _unique_debug_names +%undefine _unique_debug_srcs + +%package debuginfo +Summary: Debug information for %{name} +AutoReqProv: no + +%description debuginfo +This package provides debug information for package %{name}. +Debug information is useful when developing applications that use this +package or when debugging this package. + +%package debugsource +Summary: Debug source for %{name} +AutoReqProv: no + +%description debugsource +This package provides debug sources for package %{name}. +Debug sources are useful when developing applications that use this +package or when debugging this package. + +%endif # 0%{?_enable_debug_packages} ############################################################################## # glibc help sub-package @@ -344,6 +381,7 @@ touch locale/programs/*-kw.h %build BuildFlags="-O2 -g" +BuildFlags="$BuildFlags -DNDEBUG" reference=" \ "-Wp,-D_GLIBCXX_ASSERTIONS" \ "-fasynchronous-unwind-tables" \ @@ -607,7 +645,216 @@ for i in $RPM_BUILD_ROOT%{_prefix}/bin/{xtrace,memusage}; do -e 's~='\''/\\\$LIB/libmemusage.so~='\''%{_prefix}/\\$LIB/libmemusage.so~' \ -i $i done + +touch master.filelist +touch glibc.filelist +touch common.filelist +touch devel.filelist +touch nscd.filelist +touch nss_modules.filelist +touch nss-devel.filelist +touch libnsl.filelist +touch debugutils.filelist +touch benchtests.filelist +touch debuginfo.filelist + +{ + find $RPM_BUILD_ROOT \( -type f -o -type l \) \ + \( \ + -name etc -printf "%%%%config " -o \ + -name gconv-modules \ + -printf "%%%%verify(not md5 size mtime) %%%%config(noreplace) " -o \ + -name gconv-modules.cache \ + -printf "%%%%verify(not md5 size mtime) " \ + , \ + ! -path "*/lib/debug/*" -printf "/%%P\n" \) + + find $RPM_BUILD_ROOT -type d \ + \( -path '*%{_prefix}/share/locale' -prune -o \ + \( -path '*%{_prefix}/share/*' \ +%if %{with docs} + ! -path '*%{_infodir}' -o \ +%endif + -path "*%{_prefix}/include/*" \ + \) -printf "%%%%dir /%%P\n" \) +} | { + sed -e '\,.*/share/locale/\([^/_]\+\).*/LC_MESSAGES/.*\.mo,d' \ + -e '\,.*/share/i18n/locales/.*,d' \ + -e '\,.*/share/i18n/charmaps/.*,d' \ + -e '\,.*/etc/\(localtime\|nsswitch.conf\|ld\.so\.conf\|ld\.so\.cache\|default\|rpc\|gai\.conf\),d' \ + -e '\,.*/%{_libdir}/lib\(pcprofile\|memusage\)\.so,d' \ + -e '\,.*/bin/\(memusage\|mtrace\|xtrace\|pcprofiledump\),d' +} | sort > master.filelist + +chmod 0444 master.filelist + +############################################################################## +# glibc - The GNU C Library (glibc) core package. +############################################################################## +cat master.filelist \ + | grep -v \ + -e '%{_infodir}' \ + -e '%{_libdir}/lib.*_p.a' \ + -e '%{_prefix}/include' \ + -e '%{_libdir}/lib.*\.a' \ + -e '%{_libdir}/.*\.o' \ + -e '%{_libdir}/lib.*\.so' \ + -e 'nscd' \ + -e '%{_prefix}/bin' \ + -e '%{_prefix}/lib/locale' \ + -e '%{_prefix}/sbin/[^gi]' \ + -e '%{_prefix}/share' \ + -e '/var/db/Makefile' \ + -e '/libnss_.*\.so[0-9.]*$' \ + -e '/libnsl' \ + -e 'glibc-benchtests' \ + -e 'aux-cache' \ + -e 'build-locale-archive' \ + > glibc.filelist + +for module in compat files dns; do + cat master.filelist \ + | grep -E \ + -e "/libnss_$module(\.so\.[0-9.]+|-[0-9.]+\.so)$" \ + >> glibc.filelist +done +grep -e "libmemusage.so" -e "libpcprofile.so" master.filelist >> glibc.filelist + +############################################################################## +# glibc "common" sub-package +############################################################################## +grep '%{_prefix}/bin' master.filelist > common.filelist +grep '%{_prefix}/sbin/[^gi]' master.filelist \ + | grep -v 'nscd' >> common.filelist + +grep '%{_prefix}/share' master.filelist \ + | grep -v \ + -e '%{_prefix}/share/info/libc.info.*' \ + -e '%%dir %{prefix}/share/info' \ + -e '%%dir %{prefix}/share' \ + >> common.filelist + +echo '%{_prefix}/sbin/build-locale-archive' >> common.filelist + +############################################################################### +# glibc "devel" sub-package +############################################################################### +%if %{with docs} +grep '%{_infodir}' master.filelist | grep -v '%{_infodir}/dir' > devel.filelist +%endif + +grep '%{_libdir}/lib.*\.a' master.filelist \ + | grep '/lib\(\(c\|pthread\|nldbl\|mvec\)_nonshared\|g\|ieee\|mcheck\)\.a$' \ + >> devel.filelist + +grep '%{_libdir}/.*\.o' < master.filelist >> devel.filelist +grep '%{_libdir}/lib.*\.so' < master.filelist >> devel.filelist + +sed -i -e '\,libmemusage.so,d' \ + -e '\,libpcprofile.so,d' \ + -e '\,/libnss_[a-z]*\.so$,d' \ + devel.filelist + +grep '%{_prefix}/include' < master.filelist >> devel.filelist + +grep '%{_libdir}/lib.*\.a' < master.filelist \ + | grep -v '/lib\(\(c\|pthread\|nldbl\|mvec\)_nonshared\|g\|ieee\|mcheck\)\.a$' \ + >> devel.filelist + + +############################################################################## +# glibc "nscd" sub-package +############################################################################## +echo '%{_prefix}/sbin/nscd' > nscd.filelist + +############################################################################## +# nss modules sub-package +############################################################################## +grep -E "/libnss_(db|hesiod)(\.so\.[0-9.]+|-[0-9.]+\.so)$" \ +master.filelist > nss_modules.filelist + +############################################################################## +# nss-devel sub-package +############################################################################## +grep '/libnss_[a-z]*\.so$' master.filelist > nss-devel.filelist + +############################################################################## +# libnsl subpackage +############################################################################## +grep '/libnsl-[0-9.]*.so$' master.filelist > libnsl.filelist +test $(wc -l < libnsl.filelist) -eq 1 + +############################################################################## +# glibc debugutils sub-package +############################################################################## +cat > debugutils.filelist < benchtests.filelist +# ... and the makefile. +for b in %{SOURCE4} %{SOURCE5}; do + echo "%{_prefix}/libexec/glibc-benchtests/$(basename $b)" >> benchtests.filelist +done +# ... and finally, the comparison scripts. +echo "%{_prefix}/libexec/glibc-benchtests/benchout.schema.json" >> benchtests.filelist +echo "%{_prefix}/libexec/glibc-benchtests/compare_bench.py*" >> benchtests.filelist +echo "%{_prefix}/libexec/glibc-benchtests/import_bench.py*" >> benchtests.filelist +echo "%{_prefix}/libexec/glibc-benchtests/validate_benchout.py*" >> benchtests.filelist %endif # 0%{?_enable_debug_packages} + +############################################################################## +# glibc debuginfo sub-package +############################################################################## +touch debuginfo_additional.filelist +find_debuginfo_args='--strict-build-id -i' +%ifarch %{x86_arches} +find_debuginfo_args="$find_debuginfo_args \ + -l common.filelist \ + -l debugutils.filelist \ + -l nscd.filelist \ + -p '.*/(sbin|libexec)/.*' \ + -o debuginfo_additional.filelist \ + -l nss_modules.filelist \ + -l libnsl.filelist \ + -l glibc.filelist \ +%if %{with benchtests} + -l benchtests.filelist +%endif + " +%endif + +/usr/lib/rpm/find-debuginfo.sh $find_debuginfo_args -o debuginfo.filelist + +%ifarch %{x86_arches} +sed -i '\#^$RPM_BUILD_ROOT%{_prefix}/src/debug/#d' debuginfo_additional.filelist +cat debuginfo_additional.filelist >> debuginfo.filelist +find $RPM_BUILD_ROOT%{_prefix}/src/debug \ + \( -type d -printf '%%%%dir ' \) , \ + -printf '%{_prefix}/src/debug/%%P\n' >> debuginfo.filelist + +add_dir=%{_prefix}/lib/debug%{_libdir} +find $RPM_BUILD_ROOT$add_dir -name "*.a" -printf "$add_dir/%%P\n" >> debuginfo.filelist +%endif # %{x86_arches} + +remove_dir="%{_prefix}/src/debug" +remove_dir="$remove_dir $(echo %{_prefix}/lib/debug{,/%{_lib},/bin,/sbin})" +remove_dir="$remove_dir $(echo %{_prefix}/lib/debug%{_prefix}{,/%{_lib},/libexec,/bin,/sbin})" + +for d in $(echo $remove_dir | sed 's/ /\n/g'); do + sed -i "\|^%%dir $d/\?$|d" debuginfo.filelist +done %endif # %{with benchtests} ############################################################################## # Run the glibc testsuite @@ -734,68 +981,35 @@ fi ############################################################################## # Files list ############################################################################## -%files +%files -f glibc.filelist +%dir %{_prefix}/%{_lib}/audit %verify(not md5 size mtime) %config(noreplace) /etc/nsswitch.conf %verify(not md5 size mtime) %config(noreplace) /etc/ld.so.conf %verify(not md5 size mtime) %config(noreplace) /etc/rpc -%verify(not md5 size mtime) %config(noreplace) /usr/lib64/gconv/gconv-modules -%verify(not md5 size mtime) /usr/lib64/gconv/gconv-modules.cache %dir /etc/ld.so.conf.d %dir %{_prefix}/libexec/getconf -%{_prefix}/libexec/getconf/* %dir %{_libdir}/gconv -%{_libdir}/gconv/*.so -%dir %{_libdir}/audit -%{_libdir}/audit/* %dir %attr(0700,root,root) /var/cache/ldconfig %attr(0600,root,root) %verify(not md5 size mtime) %ghost %config(missingok,noreplace) /var/cache/ldconfig/aux-cache %attr(0644,root,root) %verify(not md5 size mtime) %ghost %config(missingok,noreplace) /etc/ld.so.cache %attr(0644,root,root) %verify(not md5 size mtime) %ghost %config(missingok,noreplace) /etc/gai.conf -%{_sbindir}/glibc* -%{_sbindir}/iconvconfig -/lib/* -%{_libdir}/libmemusage.so -%{_libdir}/libpcprofile.so -/sbin/ldconfig -/%{_lib}/*.* -%exclude /%{_lib}/libnss_db* -%exclude /%{_lib}/libnss_hesiod* -%exclude /%{_lib}/libnsl* -%exclude /lib/systemd +%{!?_licensedir:%global license %%doc} %license COPYING COPYING.LIB LICENSES -%files common -%dir %{_prefix}/share/i18n -%dir %{_prefix}/share/i18n/charmaps -%dir %{_prefix}/share/i18n/locales +%files -f common.filelist common %attr(0644,root,root) %verify(not md5 size mtime) %{_prefix}/lib/locale/locale-archive.tmpl %attr(0644,root,root) %verify(not md5 size mtime mode) %ghost %config(missingok,noreplace) %{_prefix}/lib/locale/locale-archive -%{_prefix}/lib/locale/C.utf8 +%dir %{_prefix}/lib/locale +%dir %{_prefix}/lib/locale/C.utf8 +%{_prefix}/lib/locale/C.utf8/* %{_prefix}/lib/locale/zh_CN.utf8 %{_prefix}/lib/locale/en_US.utf8 %{_prefix}/share/locale/zh_CN %{_prefix}/share/locale/en_GB -%{_prefix}/bin/catchsegv -%{_prefix}/bin/gencat -%{_prefix}/bin/getconf -%{_prefix}/bin/getent -%{_prefix}/bin/iconv -%{_prefix}/bin/ldd -%{_prefix}/bin/locale -%{_prefix}/bin/localedef -%{_prefix}/bin/makedb -%{_prefix}/bin/pldd -%{_prefix}/bin/sotruss -%{_prefix}/bin/sprof -%{_prefix}/bin/tzselect %dir %attr(755,root,root) /etc/default %verify(not md5 size mtime) %config(noreplace) /etc/default/nss -%{_prefix}/share/locale/locale.alias -%{_sbindir}/build-locale-archive -%{_sbindir}/zdump -%{_sbindir}/zic -%files all-langpacks -f libc.lang +%files -f libc.lang all-langpacks %{_prefix}/lib/locale %exclude %{_prefix}/lib/locale/locale-archive %exclude %{_prefix}/lib/locale/locale-archive.tmpl @@ -806,61 +1020,14 @@ fi %exclude %{_prefix}/share/locale/en_GB %files locale-source +%dir %{_prefix}/share/i18n/locales %{_prefix}/share/i18n/locales/* +%dir %{_prefix}/share/i18n/charmaps %{_prefix}/share/i18n/charmaps/* -%files devel -%{_infodir}/* -%{_libdir}/*.a -%{_libdir}/*.o -%{_libdir}/*.so -%{_prefix}/include/*.h -%dir %{_prefix}/include/arpa -%dir %{_prefix}/include/bits -%dir %{_prefix}/include/bits/types -%dir %{_prefix}/include/gnu -%dir %{_prefix}/include/net -%dir %{_prefix}/include/netash -%dir %{_prefix}/include/netatalk -%dir %{_prefix}/include/netax25 -%dir %{_prefix}/include/neteconet -%dir %{_prefix}/include/netinet -%dir %{_prefix}/include/netipx -%dir %{_prefix}/include/netiucv -%dir %{_prefix}/include/netpacket -%dir %{_prefix}/include/netrom -%dir %{_prefix}/include/netrose -%dir %{_prefix}/include/nfs -%dir %{_prefix}/include/protocols -%dir %{_prefix}/include/rpc -%dir %{_prefix}/include/scsi -%dir %{_prefix}/include/sys -%{_prefix}/include/arpa/* -%{_prefix}/include/bits/* -%{_prefix}/include/gnu/* -%{_prefix}/include/net/* -%{_prefix}/include/netash/* -%{_prefix}/include/netatalk/* -%{_prefix}/include/netax25/* -%{_prefix}/include/neteconet/* -%{_prefix}/include/netinet/* -%{_prefix}/include/netipx/* -%{_prefix}/include/netiucv/* -%{_prefix}/include/netpacket/* -%{_prefix}/include/netrom/* -%{_prefix}/include/netrose/* -%{_prefix}/include/nfs/* -%{_prefix}/include/protocols/* -%{_prefix}/include/rpc/* -%{_prefix}/include/scsi/* -%{_prefix}/include/sys/* -%exclude %{_libdir}/libmemusage.so -%exclude %{_libdir}/libpcprofile.so -%exclude %{_libdir}/libnss* -%exclude %{_prefix}/bin/rpcgen -%exclude %{_prefix}/include/rpcsvc/* - -%files -n nscd +%files -f devel.filelist devel + +%files -f nscd.filelist -n nscd %config(noreplace) /etc/nscd.conf %dir %attr(0755,root,root) /var/run/nscd %dir %attr(0755,root,root) /var/db/nscd @@ -878,50 +1045,42 @@ fi %attr(0600,root,root) %verify(not md5 size mtime) %ghost %config(missingok,noreplace) /var/db/nscd/hosts %attr(0600,root,root) %verify(not md5 size mtime) %ghost %config(missingok,noreplace) /var/db/nscd/services %ghost %config(missingok,noreplace) /etc/sysconfig/nscd -%{_sbindir}/nscd -%files -n nss_modules +%files -f nss_modules.filelist -n nss_modules /var/db/Makefile -/%{_lib}/libnss_db* -/%{_lib}/libnss_hesiod* -%files nss-devel -%{_libdir}/libnss* +%files -f nss-devel.filelist nss-devel + +%files -f libnsl.filelist -n libnsl +/%{_lib}/libnsl.so.1 -%files -n libnsl -/%{_lib}/libnsl* +%files -f debugutils.filelist debugutils %if %{with benchtests} -%files benchtests -%{_prefix}/libexec/glibc-benchtests/* +%files -f benchtests.filelist benchtests %endif -%files debugutils -%if %{without bootstrap} -%{_prefix}/bin/memusage -%{_prefix}/bin/memusagestat -%endif -%{_prefix}/bin/mtrace -%{_prefix}/bin/pcprofiledump -%{_prefix}/bin/xtrace -%{_prefix}/lib/debug/usr/bin/*.debug -%{_prefix}/lib/debug/usr/lib64/*.a +%if 0%{?_enable_debug_packages} +%files -f debuginfo.filelist debuginfo +%files debugsource +%endif %files help #Doc of glibc package %doc README NEWS INSTALL elf/rtld-debugger-interface.txt - #Doc of common sub-package %doc documentation/README.timezone %doc documentation/gai.conf - #Doc of nss_modules sub-package %doc hesiod/README.hesiod - %changelog -* Sat May 23 2020 liqingqing - 2.28-39 +* Thu Jul 2 2020 Wang Shuo - 2.28-40 +- add filelist to improve the scalability +- backport many patch for bugfix + +* Sat May 30 2020 liqingqing - 2.28-39 - Fix array overflow in backtrace on PowerPC (bug 25423) * Tue Apr 28 2020 liqingqing - 2.28-38 diff --git a/nptl-Don-t-madvise-user-provided-stack.patch b/nptl-Don-t-madvise-user-provided-stack.patch new file mode 100644 index 0000000..777095a --- /dev/null +++ b/nptl-Don-t-madvise-user-provided-stack.patch @@ -0,0 +1,41 @@ +From 087942251f26d5fd5802b8d14e47d460263a0c4d Mon Sep 17 00:00:00 2001 +From: Szabolcs Nagy +Date: Wed, 24 Jun 2020 07:47:15 +0100 +Subject: [PATCH] nptl: Don't madvise user provided stack + +User provided stack should not be released nor madvised at +thread exit because it's owned by the user. + +If the memory is shared or file based then MADV_DONTNEED +can have unwanted effects. With memory tagging on aarch64 +linux the tags are dropped and thus it may invalidate +pointers. + +Tested on aarch64-linux-gnu with MTE, it fixes + +FAIL: nptl/tst-stack3 +FAIL: nptl/tst-stack3-mem + +--- + nptl/pthread_create.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c +index 179f07a1..00931c19 100644 +--- a/nptl/pthread_create.c ++++ b/nptl/pthread_create.c +@@ -573,8 +573,9 @@ START_THREAD_DEFN + } + #endif + +- advise_stack_range (pd->stackblock, pd->stackblock_size, (uintptr_t) pd, +- pd->guardsize); ++ if (!pd->user_stack) ++ advise_stack_range (pd->stackblock, pd->stackblock_size, (uintptr_t) pd, ++ pd->guardsize); + + if (__glibc_unlikely (pd->cancelhandling & SETXID_BITMASK)) + { +-- +2.19.1 + diff --git a/nptl-wait-for-pending-setxid-request-also-in-detache.patch b/nptl-wait-for-pending-setxid-request-also-in-detache.patch new file mode 100644 index 0000000..750dadb --- /dev/null +++ b/nptl-wait-for-pending-setxid-request-also-in-detache.patch @@ -0,0 +1,52 @@ +From 4cab20fa49b3ea3e3454fdc4f13bf3828d8efd19 Mon Sep 17 00:00:00 2001 +From: Andreas Schwab +Date: Thu, 7 May 2020 15:50:09 +0200 +Subject: [PATCH] nptl: wait for pending setxid request also in detached thread + (bug 25942) + +There is a race between __nptl_setxid and exiting detached thread, which +causes a deadlock on stack_cache_lock. The deadlock happens in this +state: + +T1: setgroups -> __nptl_setxid (holding stack_cache_lock, waiting on cmdp->cntr == 0) +T2 (detached, exiting): start_thread -> __deallocate_stack (waiting on stack_cache_lock) +more threads waiting on stack_cache_lock in pthread_create + +For non-detached threads, start_thread waits for its own setxid handler to +finish before exiting. Do this for detached threads as well. +--- + nptl/pthread_create.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c +index afd379e89a..a43089065c 100644 +--- a/nptl/pthread_create.c ++++ b/nptl/pthread_create.c +@@ -552,11 +552,7 @@ START_THREAD_DEFN + advise_stack_range (pd->stackblock, pd->stackblock_size, (uintptr_t) pd, + pd->guardsize); + +- /* If the thread is detached free the TCB. */ +- if (IS_DETACHED (pd)) +- /* Free the TCB. */ +- __free_tcb (pd); +- else if (__glibc_unlikely (pd->cancelhandling & SETXID_BITMASK)) ++ if (__glibc_unlikely (pd->cancelhandling & SETXID_BITMASK)) + { + /* Some other thread might call any of the setXid functions and expect + us to reply. In this case wait until we did that. */ +@@ -572,6 +568,11 @@ START_THREAD_DEFN + pd->setxid_futex = 0; + } + ++ /* If the thread is detached free the TCB. */ ++ if (IS_DETACHED (pd)) ++ /* Free the TCB. */ ++ __free_tcb (pd); ++ + /* We cannot call '_exit' here. '_exit' will terminate the process. + + The 'exit' implementation in the kernel will signal when the +-- +2.19.1 + diff --git a/turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch b/turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch new file mode 100644 index 0000000..5b766cf --- /dev/null +++ b/turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch @@ -0,0 +1,25 @@ +From 44314a556239a7524b5a6451025737c1bdbb1cd0 Mon Sep 17 00:00:00 2001 +From: Wang Shuo +Date: Thu, 21 May 2020 11:23:06 +0800 +Subject: [PATCH] turn REP_STOSB_THRESHOLD from 2k to 1M + +--- + sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index dcd63c92..92c08eed 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -65,7 +65,7 @@ + Enhanced REP STOSB. Since the stored value is fixed, larger register + size has minimal impact on threshold. */ + #ifndef REP_STOSB_THRESHOLD +-# define REP_STOSB_THRESHOLD 2048 ++# define REP_STOSB_THRESHOLD 1048576 + #endif + + #ifndef SECTION +-- +2.19.1 + diff --git a/x86-64-Use-RDX_LP-on-__x86_shared_non_temporal_thres.patch b/x86-64-Use-RDX_LP-on-__x86_shared_non_temporal_thres.patch new file mode 100644 index 0000000..82f8476 --- /dev/null +++ b/x86-64-Use-RDX_LP-on-__x86_shared_non_temporal_thres.patch @@ -0,0 +1,50 @@ +From 55c7bcc71b84123d5d4bd2814366a6b05fcf8ebd Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Sat, 9 May 2020 12:04:23 -0700 +Subject: [PATCH] x86-64: Use RDX_LP on __x86_shared_non_temporal_threshold [BZ + #25966] + +Since __x86_shared_non_temporal_threshold is defined as + +long int __x86_shared_non_temporal_threshold; + +and long int is 4 bytes for x32, use RDX_LP to compare against +__x86_shared_non_temporal_threshold in assembly code. +--- + sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +index c763b7d871..74953245aa 100644 +--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +@@ -244,7 +244,7 @@ L(return): + ret + + L(movsb): +- cmpq __x86_shared_non_temporal_threshold(%rip), %rdx ++ cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP + jae L(more_8x_vec) + cmpq %rsi, %rdi + jb 1f +@@ -402,7 +402,7 @@ L(more_8x_vec): + addq %r8, %rdx + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) + /* Check non-temporal store threshold. */ +- cmpq __x86_shared_non_temporal_threshold(%rip), %rdx ++ cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP + ja L(large_forward) + #endif + L(loop_4x_vec_forward): +@@ -454,7 +454,7 @@ L(more_8x_vec_backward): + subq %r8, %rdx + #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) + /* Check non-temporal store threshold. */ +- cmpq __x86_shared_non_temporal_threshold(%rip), %rdx ++ cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP + ja L(large_backward) + #endif + L(loop_4x_vec_backward): +-- +2.19.1 + diff --git a/x86_64-Use-xmmN-with-vpxor-to-clear-a-vector-registe.patch b/x86_64-Use-xmmN-with-vpxor-to-clear-a-vector-registe.patch new file mode 100644 index 0000000..a308561 --- /dev/null +++ b/x86_64-Use-xmmN-with-vpxor-to-clear-a-vector-registe.patch @@ -0,0 +1,43 @@ +From a35a59036ebae3efcdf5e8167610e0656fca9770 Mon Sep 17 00:00:00 2001 +From: "H.J. Lu" +Date: Thu, 11 Jun 2020 12:41:18 -0700 +Subject: [PATCH] x86_64: Use %xmmN with vpxor to clear a vector register + +Since "vpxor %xmmN, %xmmN, %xmmN" clears the whole vector register, use +%xmmN, instead of %ymmN, with vpxor to clear a vector register. +--- + sysdeps/x86_64/multiarch/strcmp-avx2.S | 4 ++-- + sysdeps/x86_64/multiarch/strrchr-avx2.S | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S +index 48d03a9f46..5f88a68262 100644 +--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S ++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S +@@ -91,8 +91,8 @@ ENTRY (STRCMP) + # endif + movl %edi, %eax + xorl %edx, %edx +- /* Make %ymm7 all zeros in this function. */ +- vpxor %ymm7, %ymm7, %ymm7 ++ /* Make %xmm7 (%ymm7) all zeros in this function. */ ++ vpxor %xmm7, %xmm7, %xmm7 + orl %esi, %eax + andl $(PAGE_SIZE - 1), %eax + cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax +diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S +index 23077b4c45..146bdd51d0 100644 +--- a/sysdeps/x86_64/multiarch/strrchr-avx2.S ++++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S +@@ -44,7 +44,7 @@ ENTRY (STRRCHR) + movl %edi, %ecx + /* Broadcast CHAR to YMM4. */ + VPBROADCAST %xmm4, %ymm4 +- vpxor %ymm0, %ymm0, %ymm0 ++ vpxor %xmm0, %xmm0, %xmm0 + + /* Check if we may cross page boundary with one vector load. */ + andl $(2 * VEC_SIZE - 1), %ecx +-- +2.19.1 + -- Gitee