From dfc9af6c3a7f4e6b0ddd924fc888292358100e7c Mon Sep 17 00:00:00 2001
From: Qingqing Li <liqingqing3@huawei.com>
Date: Fri, 28 Mar 2025 14:44:31 +0800
Subject: [PATCH] sync from glibc upstream 2.38 branch. below is the patch
 list: - math: Improve layout of exp/exp10 data - AArch64: Use
 prefer_sve_ifuncs for SVE memset - AArch64: Add SVE memset - math: Improve
 layout of expf data - AArch64: Remove zva_128 from memset - AArch64: Optimize
 memset - AArch64: Improve generic strlen - assert: Add test for CVE-2025-0395

(cherry picked from commit a6a6276229d415c277b108ed8e6ef4f2fe517bae)
---
 AArch64-Add-SVE-memset.patch                  | 200 ++++++++++++
 AArch64-Improve-generic-strlen.patch          |  92 ++++++
 AArch64-Optimize-memset.patch                 | 287 ++++++++++++++++++
 AArch64-Remove-zva_128-from-memset.patch      |  65 ++++
 ...Use-prefer_sve_ifuncs-for-SVE-memset.patch |  29 ++
 assert-Add-test-for-CVE-2025-0395.patch       | 132 ++++++++
 glibc.spec                                    |  20 +-
 math-Improve-layout-of-exp-exp10-data.patch   |  39 +++
 math-Improve-layout-of-expf-data.patch        |  34 +++
 9 files changed, 897 insertions(+), 1 deletion(-)
 create mode 100644 AArch64-Add-SVE-memset.patch
 create mode 100644 AArch64-Improve-generic-strlen.patch
 create mode 100644 AArch64-Optimize-memset.patch
 create mode 100644 AArch64-Remove-zva_128-from-memset.patch
 create mode 100644 AArch64-Use-prefer_sve_ifuncs-for-SVE-memset.patch
 create mode 100644 assert-Add-test-for-CVE-2025-0395.patch
 create mode 100644 math-Improve-layout-of-exp-exp10-data.patch
 create mode 100644 math-Improve-layout-of-expf-data.patch

diff --git a/AArch64-Add-SVE-memset.patch b/AArch64-Add-SVE-memset.patch
new file mode 100644
index 0000000..502acd4
--- /dev/null
+++ b/AArch64-Add-SVE-memset.patch
@@ -0,0 +1,200 @@
+From 52c2b1556f773d9a75d030160e0e273a5ea84502 Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Tue, 24 Dec 2024 18:01:59 +0000
+Subject: [PATCH] AArch64: Add SVE memset
+
+Add SVE memset based on the generic memset with predicated load for sizes < 16.
+Unaligned memsets of 128-1024 are improved by ~20% on average by using aligned
+stores for the last 64 bytes.  Performance of random memset benchmark improves
+by ~2% on Neoverse V1.
+
+Reviewed-by: Yury Khrustalev <yury.khrustalev@arm.com>
+(cherry picked from commit 163b1bbb76caba4d9673c07940c5930a1afa7548)
+---
+ sysdeps/aarch64/multiarch/Makefile           |   1 +
+ sysdeps/aarch64/multiarch/ifunc-impl-list.c  |   3 +-
+ sysdeps/aarch64/multiarch/memset.c           |   4 +
+ sysdeps/aarch64/multiarch/memset_sve_zva64.S | 123 +++++++++++++++++++
+ 4 files changed, 130 insertions(+), 1 deletion(-)
+ create mode 100644 sysdeps/aarch64/multiarch/memset_sve_zva64.S
+
+diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
+index e4720b7468..214b6137b0 100644
+--- a/sysdeps/aarch64/multiarch/Makefile
++++ b/sysdeps/aarch64/multiarch/Makefile
+@@ -14,6 +14,7 @@ sysdep_routines += \
+   memset_generic \
+   memset_kunpeng \
+   memset_mops \
++  memset_sve_zva64 \
+   memset_zva64 \
+   strlen_asimd \
+   strlen_generic \
+diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+index 73038ac810..2fa6baa319 100644
+--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
+@@ -56,7 +56,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag)
+ 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
+ #if HAVE_AARCH64_SVE_ASM
+-	      IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 256, __memset_a64fx)
++	      IFUNC_IMPL_ADD (array, i, memset, sve && !bti && zva_size == 256, __memset_a64fx)
++	      IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 64, __memset_sve_zva64)
+ #endif
+ 	      IFUNC_IMPL_ADD (array, i, memset, mops, __memset_mops)
+ 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
+diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
+index 6deb6865e5..89fde57f42 100644
+--- a/sysdeps/aarch64/multiarch/memset.c
++++ b/sysdeps/aarch64/multiarch/memset.c
+@@ -34,6 +34,7 @@ extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden;
+ extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden;
+ extern __typeof (__redirect_memset) __memset_generic attribute_hidden;
+ extern __typeof (__redirect_memset) __memset_mops attribute_hidden;
++extern __typeof (__redirect_memset) __memset_sve_zva64 attribute_hidden;
+ 
+ static inline __typeof (__redirect_memset) *
+ select_memset_ifunc (void)
+@@ -47,6 +48,9 @@ select_memset_ifunc (void)
+     {
+       if (IS_A64FX (midr) && zva_size == 256)
+ 	return __memset_a64fx;
++
++      if (zva_size == 64)
++	return __memset_sve_zva64;
+     }
+ 
+   if (IS_KUNPENG920 (midr))
+diff --git a/sysdeps/aarch64/multiarch/memset_sve_zva64.S b/sysdeps/aarch64/multiarch/memset_sve_zva64.S
+new file mode 100644
+index 0000000000..7fb40fdd9e
+--- /dev/null
++++ b/sysdeps/aarch64/multiarch/memset_sve_zva64.S
+@@ -0,0 +1,123 @@
++/* Optimized memset for SVE.
++   Copyright (C) 2025 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++
++/* Assumptions:
++ *
++ * ARMv8-a, AArch64, Advanced SIMD, SVE, unaligned accesses.
++ * ZVA size is 64.
++ */
++
++#if HAVE_AARCH64_SVE_ASM
++
++.arch armv8.2-a+sve
++
++#define dstin	x0
++#define val	x1
++#define valw	w1
++#define count	x2
++#define dst	x3
++#define dstend	x4
++#define zva_val	x5
++#define vlen	x5
++#define off	x3
++#define dstend2 x5
++
++ENTRY (__memset_sve_zva64)
++	dup	v0.16B, valw
++	cmp	count, 16
++	b.lo	L(set_16)
++
++	add	dstend, dstin, count
++	cmp	count, 64
++	b.hs	L(set_128)
++
++	/* Set 16..63 bytes.  */
++	mov	off, 16
++	and	off, off, count, lsr 1
++	sub	dstend2, dstend, off
++	str	q0, [dstin]
++	str	q0, [dstin, off]
++	str	q0, [dstend2, -16]
++	str	q0, [dstend, -16]
++	ret
++
++	.p2align 4
++L(set_16):
++	whilelo p0.b, xzr, count
++	st1b	z0.b, p0, [dstin]
++	ret
++
++	.p2align 4
++L(set_128):
++	bic	dst, dstin, 15
++	cmp	count, 128
++	b.hi	L(set_long)
++	stp	q0, q0, [dstin]
++	stp	q0, q0, [dstin, 32]
++	stp	q0, q0, [dstend, -64]
++	stp	q0, q0, [dstend, -32]
++	ret
++
++	.p2align 4
++L(set_long):
++	cmp	count, 256
++	b.lo	L(no_zva)
++	tst	valw, 255
++	b.ne	L(no_zva)
++
++	str	q0, [dstin]
++	str	q0, [dst, 16]
++	bic	dst, dstin, 31
++	stp	q0, q0, [dst, 32]
++	bic	dst, dstin, 63
++	sub	count, dstend, dst	/* Count is now 64 too large.  */
++	sub	count, count, 128	/* Adjust count and bias for loop.  */
++
++	sub	x8, dstend, 1		/* Write last bytes before ZVA loop.  */
++	bic	x8, x8, 15
++	stp	q0, q0, [x8, -48]
++	str	q0, [x8, -16]
++	str	q0, [dstend, -16]
++
++	.p2align 4
++L(zva64_loop):
++	add	dst, dst, 64
++	dc	zva, dst
++	subs	count, count, 64
++	b.hi	L(zva64_loop)
++	ret
++
++L(no_zva):
++	str	q0, [dstin]
++	sub	count, dstend, dst	/* Count is 16 too large.  */
++	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
++L(no_zva_loop):
++	stp	q0, q0, [dst, 16]
++	stp	q0, q0, [dst, 48]
++	add	dst, dst, 64
++	subs	count, count, 64
++	b.hi	L(no_zva_loop)
++	stp	q0, q0, [dstend, -64]
++	stp	q0, q0, [dstend, -32]
++	ret
++
++END (__memset_sve_zva64)
++#endif
+-- 
+2.27.0
+
diff --git a/AArch64-Improve-generic-strlen.patch b/AArch64-Improve-generic-strlen.patch
new file mode 100644
index 0000000..7868ad4
--- /dev/null
+++ b/AArch64-Improve-generic-strlen.patch
@@ -0,0 +1,92 @@
+From 9ca74b8ad1968d935815bdc2f1f1c7e9f2e32f70 Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Wed, 7 Aug 2024 14:43:47 +0100
+Subject: [PATCH] AArch64: Improve generic strlen
+
+Improve performance by handling another 16 bytes before entering the loop.
+Use ADDHN in the loop to avoid SHRN+FMOV when it terminates.  Change final
+size computation to avoid increasing latency.  On Neoverse V1 performance
+of the random strlen benchmark improves by 4.6%.
+
+Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+(cherry picked from commit 3dc426b642dcafdbc11a99f2767e081d086f5fc7)
+---
+ sysdeps/aarch64/strlen.S | 39 +++++++++++++++++++++++++++------------
+ 1 file changed, 27 insertions(+), 12 deletions(-)
+
+diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
+index 133ef93342..352fb40d3a 100644
+--- a/sysdeps/aarch64/strlen.S
++++ b/sysdeps/aarch64/strlen.S
+@@ -1,4 +1,5 @@
+-/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
++/* Generic optimized strlen using SIMD.
++   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+ 
+@@ -56,36 +57,50 @@ ENTRY (STRLEN)
+ 	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
+ 	fmov	synd, dend
+ 	lsr	synd, synd, shift
+-	cbz	synd, L(loop)
++	cbz	synd, L(next16)
+ 
+ 	rbit	synd, synd
+ 	clz	result, synd
+ 	lsr	result, result, 2
+ 	ret
+ 
++L(next16):
++	ldr	data, [src, 16]
++	cmeq	vhas_nul.16b, vdata.16b, 0
++	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
++	fmov	synd, dend
++	cbz	synd, L(loop)
++	add	src, src, 16
++#ifndef __AARCH64EB__
++	rbit	synd, synd
++#endif
++	sub	result, src, srcin
++	clz	tmp, synd
++	add	result, result, tmp, lsr 2
++	ret
++
+ 	.p2align 5
+ L(loop):
+-	ldr	data, [src, 16]
++	ldr	data, [src, 32]!
+ 	cmeq	vhas_nul.16b, vdata.16b, 0
+-	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
++	addhn	vend.8b, vhas_nul.8h, vhas_nul.8h
+ 	fmov	synd, dend
+ 	cbnz	synd, L(loop_end)
+-	ldr	data, [src, 32]!
++	ldr	data, [src, 16]
+ 	cmeq	vhas_nul.16b, vdata.16b, 0
+-	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
++	addhn	vend.8b, vhas_nul.8h, vhas_nul.8h
+ 	fmov	synd, dend
+ 	cbz	synd, L(loop)
+-	sub	src, src, 16
++	add	src, src, 16
+ L(loop_end):
+-	shrn	vend.8b, vhas_nul.8h, 4		/* 128->64 */
+-	sub	result, src, srcin
+-	fmov	synd, dend
++	sub	result, shift, src, lsl 2	/* (srcin - src) << 2.  */
+ #ifndef __AARCH64EB__
+ 	rbit	synd, synd
++	sub	result, result, 3
+ #endif
+-	add	result, result, 16
+ 	clz	tmp, synd
+-	add	result, result, tmp, lsr 2
++	sub	result, tmp, result
++	lsr	result, result, 2
+ 	ret
+ 
+ END (STRLEN)
+-- 
+2.27.0
+
diff --git a/AArch64-Optimize-memset.patch b/AArch64-Optimize-memset.patch
new file mode 100644
index 0000000..663f4ef
--- /dev/null
+++ b/AArch64-Optimize-memset.patch
@@ -0,0 +1,287 @@
+From 95aa21432ccbf77225abd485d98df36ba760ff80 Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Mon, 9 Sep 2024 15:26:47 +0100
+Subject: [PATCH] AArch64: Optimize memset
+
+Improve small memsets by avoiding branches and use overlapping stores.
+Use DC ZVA for copies over 128 bytes.  Remove unnecessary code for ZVA sizes
+other than 64 and 128.  Performance of random memset benchmark improves by 24%
+on Neoverse N1.
+
+Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+(cherry picked from commit cec3aef32412779e207f825db0d057ebb4628ae8)
+---
+ sysdeps/aarch64/memset.S | 195 +++++++++++++++++----------------------
+ 1 file changed, 84 insertions(+), 111 deletions(-)
+
+diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
+index bbfb7184c3..caafb019e2 100644
+--- a/sysdeps/aarch64/memset.S
++++ b/sysdeps/aarch64/memset.S
+@@ -1,4 +1,5 @@
+-/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
++/* Generic optimized memset using SIMD.
++   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+ 
+    This file is part of the GNU C Library.
+ 
+@@ -17,7 +18,6 @@
+    <https://www.gnu.org/licenses/>.  */
+ 
+ #include <sysdep.h>
+-#include "memset-reg.h"
+ 
+ #ifndef MEMSET
+ # define MEMSET memset
+@@ -25,130 +25,132 @@
+ 
+ /* Assumptions:
+  *
+- * ARMv8-a, AArch64, unaligned accesses
++ * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
+  *
+  */
+ 
+-ENTRY (MEMSET)
++#define dstin	x0
++#define val	x1
++#define valw	w1
++#define count	x2
++#define dst	x3
++#define dstend	x4
++#define zva_val	x5
++#define off	x3
++#define dstend2	x5
+ 
++ENTRY (MEMSET)
+ 	PTR_ARG (0)
+ 	SIZE_ARG (2)
+ 
+ 	dup	v0.16B, valw
++	cmp	count, 16
++	b.lo	L(set_small)
++
+ 	add	dstend, dstin, count
++	cmp	count, 64
++	b.hs	L(set_128)
+ 
+-	cmp	count, 96
+-	b.hi	L(set_long)
+-	cmp	count, 16
+-	b.hs	L(set_medium)
+-	mov	val, v0.D[0]
++	/* Set 16..63 bytes.  */
++	mov	off, 16
++	and	off, off, count, lsr 1
++	sub	dstend2, dstend, off
++	str	q0, [dstin]
++	str	q0, [dstin, off]
++	str	q0, [dstend2, -16]
++	str	q0, [dstend, -16]
++	ret
+ 
++	.p2align 4
+ 	/* Set 0..15 bytes.  */
+-	tbz	count, 3, 1f
+-	str	val, [dstin]
+-	str	val, [dstend, -8]
+-	ret
+-	nop
+-1:	tbz	count, 2, 2f
+-	str	valw, [dstin]
+-	str	valw, [dstend, -4]
++L(set_small):
++	add	dstend, dstin, count
++	cmp	count, 4
++	b.lo	2f
++	lsr	off, count, 3
++	sub	dstend2, dstend, off, lsl 2
++	str	s0, [dstin]
++	str	s0, [dstin, off, lsl 2]
++	str	s0, [dstend2, -4]
++	str	s0, [dstend, -4]
+ 	ret
++
++	/* Set 0..3 bytes.  */
+ 2:	cbz	count, 3f
++	lsr	off, count, 1
+ 	strb	valw, [dstin]
+-	tbz	count, 1, 3f
+-	strh	valw, [dstend, -2]
++	strb	valw, [dstin, off]
++	strb	valw, [dstend, -1]
+ 3:	ret
+ 
+-	/* Set 17..96 bytes.  */
+-L(set_medium):
+-	str	q0, [dstin]
+-	tbnz	count, 6, L(set96)
+-	str	q0, [dstend, -16]
+-	tbz	count, 5, 1f
+-	str	q0, [dstin, 16]
+-	str	q0, [dstend, -32]
+-1:	ret
+-
+ 	.p2align 4
+-	/* Set 64..96 bytes.  Write 64 bytes from the start and
+-	   32 bytes from the end.  */
+-L(set96):
+-	str	q0, [dstin, 16]
++L(set_128):
++	bic	dst, dstin, 15
++	cmp	count, 128
++	b.hi	L(set_long)
++	stp	q0, q0, [dstin]
+ 	stp	q0, q0, [dstin, 32]
++	stp	q0, q0, [dstend, -64]
+ 	stp	q0, q0, [dstend, -32]
+ 	ret
+ 
+-	.p2align 3
+-	nop
++	.p2align 4
+ L(set_long):
+-	and	valw, valw, 255
+-	bic	dst, dstin, 15
+ 	str	q0, [dstin]
+-	cmp	count, 256
+-	ccmp	valw, 0, 0, cs
+-	b.eq	L(try_zva)
+-L(no_zva):
+-	sub	count, dstend, dst	/* Count is 16 too large.  */
+-	sub	dst, dst, 16		/* Dst is biased by -32.  */
+-	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
+-1:	stp	q0, q0, [dst, 32]
+-	stp	q0, q0, [dst, 64]!
+-L(tail64):
+-	subs	count, count, 64
+-	b.hi	1b
+-2:	stp	q0, q0, [dstend, -64]
++	str	q0, [dst, 16]
++	tst	valw, 255
++	b.ne	L(no_zva)
++#ifndef ZVA64_ONLY
++	mrs	zva_val, dczid_el0
++	and	zva_val, zva_val, 31
++	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
++	b.ne	L(zva_128)
++#endif
++	stp	q0, q0, [dst, 32]
++	bic	dst, dstin, 63
++	sub	count, dstend, dst	/* Count is now 64 too large.  */
++	sub	count, count, 64 + 64	/* Adjust count and bias for loop.  */
++
++	/* Write last bytes before ZVA loop.  */
++	stp	q0, q0, [dstend, -64]
+ 	stp	q0, q0, [dstend, -32]
++
++	.p2align 4
++L(zva64_loop):
++	add	dst, dst, 64
++	dc	zva, dst
++	subs	count, count, 64
++	b.hi	L(zva64_loop)
+ 	ret
+ 
+-L(try_zva):
+-#ifndef ZVA64_ONLY
+ 	.p2align 3
+-	mrs	tmp1, dczid_el0
+-	tbnz	tmp1w, 4, L(no_zva)
+-	and	tmp1w, tmp1w, 15
+-	cmp	tmp1w, 4	/* ZVA size is 64 bytes.  */
+-	b.ne	 L(zva_128)
+-	nop
+-#endif
+-	/* Write the first and last 64 byte aligned block using stp rather
+-	   than using DC ZVA.  This is faster on some cores.
+-	 */
+-	.p2align 4
+-L(zva_64):
+-	str	q0, [dst, 16]
++L(no_zva):
++	sub	count, dstend, dst	/* Count is 32 too large.  */
++	sub	count, count, 64 + 32	/* Adjust count and bias for loop.  */
++L(no_zva_loop):
+ 	stp	q0, q0, [dst, 32]
+-	bic	dst, dst, 63
+ 	stp	q0, q0, [dst, 64]
+-	stp	q0, q0, [dst, 96]
+-	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+-	sub	count, count, 128+64+64	/* Adjust count and bias for loop.  */
+-	add	dst, dst, 128
+-1:	dc	zva, dst
+ 	add	dst, dst, 64
+ 	subs	count, count, 64
+-	b.hi	1b
+-	stp	q0, q0, [dst, 0]
+-	stp	q0, q0, [dst, 32]
++	b.hi	L(no_zva_loop)
+ 	stp	q0, q0, [dstend, -64]
+ 	stp	q0, q0, [dstend, -32]
+ 	ret
+ 
+ #ifndef ZVA64_ONLY
+-	.p2align 3
++	.p2align 4
+ L(zva_128):
+-	cmp	tmp1w, 5	/* ZVA size is 128 bytes.  */
+-	b.ne	L(zva_other)
++	cmp	zva_val, 5		/* ZVA size is 128 bytes.  */
++	b.ne	L(no_zva)
+ 
+-	str	q0, [dst, 16]
+ 	stp	q0, q0, [dst, 32]
+ 	stp	q0, q0, [dst, 64]
+ 	stp	q0, q0, [dst, 96]
+ 	bic	dst, dst, 127
+ 	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+-	sub	count, count, 128+128	/* Adjust count and bias for loop.  */
+-	add	dst, dst, 128
+-1:	dc	zva, dst
+-	add	dst, dst, 128
++	sub	count, count, 128 + 128	/* Adjust count and bias for loop.  */
++1:	add	dst, dst, 128
++	dc	zva, dst
+ 	subs	count, count, 128
+ 	b.hi	1b
+ 	stp	q0, q0, [dstend, -128]
+@@ -156,35 +158,6 @@ L(zva_128):
+ 	stp	q0, q0, [dstend, -64]
+ 	stp	q0, q0, [dstend, -32]
+ 	ret
+-
+-L(zva_other):
+-	mov	tmp2w, 4
+-	lsl	zva_lenw, tmp2w, tmp1w
+-	add	tmp1, zva_len, 64	/* Max alignment bytes written.	 */
+-	cmp	count, tmp1
+-	blo	L(no_zva)
+-
+-	sub	tmp2, zva_len, 1
+-	add	tmp1, dst, zva_len
+-	add	dst, dst, 16
+-	subs	count, tmp1, dst	/* Actual alignment bytes to write.  */
+-	bic	tmp1, tmp1, tmp2	/* Aligned dc zva start address.  */
+-	beq	2f
+-1:	stp	q0, q0, [dst], 64
+-	stp	q0, q0, [dst, -32]
+-	subs	count, count, 64
+-	b.hi	1b
+-2:	mov	dst, tmp1
+-	sub	count, dstend, tmp1	/* Remaining bytes to write.  */
+-	subs	count, count, zva_len
+-	b.lo	4f
+-3:	dc	zva, dst
+-	add	dst, dst, zva_len
+-	subs	count, count, zva_len
+-	b.hs	3b
+-4:	add	count, count, zva_len
+-	sub	dst, dst, 32		/* Bias dst for tail loop.  */
+-	b	L(tail64)
+ #endif
+ 
+ END (MEMSET)
+-- 
+2.27.0
+
diff --git a/AArch64-Remove-zva_128-from-memset.patch b/AArch64-Remove-zva_128-from-memset.patch
new file mode 100644
index 0000000..aa8bc76
--- /dev/null
+++ b/AArch64-Remove-zva_128-from-memset.patch
@@ -0,0 +1,65 @@
+From 5fe151d86a19bc3dc791fd2d92efeb6c6e11cf64 Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Mon, 25 Nov 2024 18:43:08 +0000
+Subject: [PATCH] AArch64: Remove zva_128 from memset
+
+Remove ZVA 128 support from memset - the new memset no longer
+guarantees count >= 256, which can result in underflow and a
+crash if ZVA size is 128 ([1]).  Since only one CPU uses a ZVA
+size of 128 and its memcpy implementation was removed in commit
+e162ab2bf1b82c40f29e1925986582fa07568ce8, remove this special
+case too.
+
+[1] https://sourceware.org/pipermail/libc-alpha/2024-November/161626.html
+
+Reviewed-by: Andrew Pinski <quic_apinski@quicinc.com>
+(cherry picked from commit a08d9a52f967531a77e1824c23b5368c6434a72d)
+---
+ sysdeps/aarch64/memset.S | 25 +------------------------
+ 1 file changed, 1 insertion(+), 24 deletions(-)
+
+diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
+index caafb019e2..71814d0b2f 100644
+--- a/sysdeps/aarch64/memset.S
++++ b/sysdeps/aarch64/memset.S
+@@ -104,7 +104,7 @@ L(set_long):
+ 	mrs	zva_val, dczid_el0
+ 	and	zva_val, zva_val, 31
+ 	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
+-	b.ne	L(zva_128)
++	b.ne	L(no_zva)
+ #endif
+ 	stp	q0, q0, [dst, 32]
+ 	bic	dst, dstin, 63
+@@ -137,28 +137,5 @@ L(no_zva_loop):
+ 	stp	q0, q0, [dstend, -32]
+ 	ret
+ 
+-#ifndef ZVA64_ONLY
+-	.p2align 4
+-L(zva_128):
+-	cmp	zva_val, 5		/* ZVA size is 128 bytes.  */
+-	b.ne	L(no_zva)
+-
+-	stp	q0, q0, [dst, 32]
+-	stp	q0, q0, [dst, 64]
+-	stp	q0, q0, [dst, 96]
+-	bic	dst, dst, 127
+-	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+-	sub	count, count, 128 + 128	/* Adjust count and bias for loop.  */
+-1:	add	dst, dst, 128
+-	dc	zva, dst
+-	subs	count, count, 128
+-	b.hi	1b
+-	stp	q0, q0, [dstend, -128]
+-	stp	q0, q0, [dstend, -96]
+-	stp	q0, q0, [dstend, -64]
+-	stp	q0, q0, [dstend, -32]
+-	ret
+-#endif
+-
+ END (MEMSET)
+ libc_hidden_builtin_def (MEMSET)
+-- 
+2.27.0
+
diff --git a/AArch64-Use-prefer_sve_ifuncs-for-SVE-memset.patch b/AArch64-Use-prefer_sve_ifuncs-for-SVE-memset.patch
new file mode 100644
index 0000000..b92230b
--- /dev/null
+++ b/AArch64-Use-prefer_sve_ifuncs-for-SVE-memset.patch
@@ -0,0 +1,29 @@
+From 097299ffa904b327fce83770fa6a522e4393ddb3 Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Thu, 27 Feb 2025 16:28:52 +0000
+Subject: [PATCH] AArch64: Use prefer_sve_ifuncs for SVE memset
+
+Use prefer_sve_ifuncs for SVE memset just like memcpy.
+
+Reviewed-by: Yury Khrustalev <yury.khrustalev@arm.com>
+(cherry picked from commit 0f044be1dae5169d0e57f8d487b427863aeadab4)
+---
+ sysdeps/aarch64/multiarch/memset.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
+index 89fde57f42..ce5d35a20e 100644
+--- a/sysdeps/aarch64/multiarch/memset.c
++++ b/sysdeps/aarch64/multiarch/memset.c
+@@ -49,7 +49,7 @@ select_memset_ifunc (void)
+       if (IS_A64FX (midr) && zva_size == 256)
+ 	return __memset_a64fx;
+ 
+-      if (zva_size == 64)
++      if (prefer_sve_ifuncs && zva_size == 64)
+ 	return __memset_sve_zva64;
+     }
+ 
+-- 
+2.27.0
+
diff --git a/assert-Add-test-for-CVE-2025-0395.patch b/assert-Add-test-for-CVE-2025-0395.patch
new file mode 100644
index 0000000..2670800
--- /dev/null
+++ b/assert-Add-test-for-CVE-2025-0395.patch
@@ -0,0 +1,132 @@
+From f984e2d7e8299726891a1a497a3c36cd5542a0bf Mon Sep 17 00:00:00 2001
+From: Siddhesh Poyarekar <siddhesh@sourceware.org>
+Date: Fri, 31 Jan 2025 12:16:30 -0500
+Subject: [PATCH] assert: Add test for CVE-2025-0395
+
+Use the __progname symbol to override the program name to induce the
+failure that CVE-2025-0395 describes.
+
+This is related to BZ #32582
+
+Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+(cherry picked from commit cdb9ba84191ce72e86346fb8b1d906e7cd930ea2)
+---
+ assert/Makefile                  |  1 +
+ assert/tst-assert-sa-2025-0001.c | 92 ++++++++++++++++++++++++++++++++
+ 2 files changed, 93 insertions(+)
+ create mode 100644 assert/tst-assert-sa-2025-0001.c
+
+diff --git a/assert/Makefile b/assert/Makefile
+index 67f4e6a570..b0fc9fc4d2 100644
+--- a/assert/Makefile
++++ b/assert/Makefile
+@@ -38,6 +38,7 @@ tests := \
+   test-assert-perr \
+   tst-assert-c++ \
+   tst-assert-g++ \
++  tst-assert-sa-2025-0001 \
+   # tests
+ 
+ ifeq ($(have-cxx-thread_local),yes)
+diff --git a/assert/tst-assert-sa-2025-0001.c b/assert/tst-assert-sa-2025-0001.c
+new file mode 100644
+index 0000000000..102cb0078d
+--- /dev/null
++++ b/assert/tst-assert-sa-2025-0001.c
+@@ -0,0 +1,92 @@
++/* Test for CVE-2025-0395.
++   Copyright The GNU Toolchain Authors.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Test that a large enough __progname does not result in a buffer overflow
++   when printing an assertion failure.  This was CVE-2025-0395.  */
++#include <assert.h>
++#include <inttypes.h>
++#include <signal.h>
++#include <stdbool.h>
++#include <string.h>
++#include <sys/mman.h>
++#include <support/check.h>
++#include <support/support.h>
++#include <support/xstdio.h>
++#include <support/xunistd.h>
++
++extern const char *__progname;
++
++int
++do_test (int argc, char **argv)
++{
++
++  support_need_proc ("Reads /proc/self/maps to add guards to writable maps.");
++  ignore_stderr ();
++
++  /* XXX assumes that the assert is on a 2 digit line number.  */
++  const char *prompt = ": %s:99: do_test: Assertion `argc < 1' failed.\n";
++
++  int ret = fprintf (stderr, prompt, __FILE__);
++  if (ret < 0)
++    FAIL_EXIT1 ("fprintf failed: %m\n");
++
++  size_t pagesize = getpagesize ();
++  size_t namesize = pagesize - 1 - ret;
++
++  /* Alter the progname so that the assert message fills the entire page.  */
++  char progname[namesize];
++  memset (progname, 'A', namesize - 1);
++  progname[namesize - 1] = '\0';
++  __progname = progname;
++
++  FILE *f = xfopen ("/proc/self/maps", "r");
++  char *line = NULL;
++  size_t len = 0;
++  uintptr_t prev_to = 0;
++
++  /* Pad the beginning of every writable mapping with a PROT_NONE map.  This
++     ensures that the mmap in the assert_fail path never ends up below a
++     writable map and will terminate immediately in case of a buffer
++     overflow.  */
++  while (xgetline (&line, &len, f))
++    {
++      uintptr_t from, to;
++      char perm[4];
++
++      sscanf (line, "%" SCNxPTR "-%" SCNxPTR " %c%c%c%c ",
++	      &from, &to,
++	      &perm[0], &perm[1], &perm[2], &perm[3]);
++
++      bool writable = (memchr (perm, 'w', 4) != NULL);
++
++      if (prev_to != 0 && from - prev_to > pagesize && writable)
++	xmmap ((void *) from - pagesize, pagesize, PROT_NONE,
++	       MAP_ANONYMOUS | MAP_PRIVATE, 0);
++
++      prev_to = to;
++    }
++
++  xfclose (f);
++
++  assert (argc < 1);
++  return 0;
++}
++
++#define EXPECTED_SIGNAL SIGABRT
++#define TEST_FUNCTION_ARGV do_test
++#include <support/test-driver.c>
+-- 
+2.27.0
+
diff --git a/glibc.spec b/glibc.spec
index 181eccd..da5b102 100644
--- a/glibc.spec
+++ b/glibc.spec
@@ -67,7 +67,7 @@
 ##############################################################################
 Name: 	 	glibc
 Version: 	2.38
-Release: 	56
+Release: 	57
 Summary: 	The GNU libc libraries
 License:	%{all_license}
 URL: 		http://www.gnu.org/software/glibc/
@@ -278,6 +278,14 @@ Patch188: backport-x86-Disable-non-temporal-memset-on-Skylake-Server.patch
 Patch189: backport-Use-Avoid_Non_Temporal_Memset-to-control-non-tem.patch
 Patch190: backport-Add-Avoid_STOSB-tunable-to-allow-NT-memset-witho.patch
 Patch191: backport-x86-Enable-non-temporal-memset-for-Hygon-processors.patch 
+Patch192: assert-Add-test-for-CVE-2025-0395.patch
+Patch193: AArch64-Improve-generic-strlen.patch
+Patch194: AArch64-Optimize-memset.patch
+Patch195: AArch64-Remove-zva_128-from-memset.patch
+Patch196: math-Improve-layout-of-expf-data.patch
+Patch197: AArch64-Add-SVE-memset.patch
+Patch198: AArch64-Use-prefer_sve_ifuncs-for-SVE-memset.patch
+Patch199: math-Improve-layout-of-exp-exp10-data.patch
 
 #openEuler patch list
 Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch
@@ -1502,6 +1510,16 @@ fi
 %endif
 
 %changelog
+* Fri Mar 28 2025 Qingqing Li <liqingqing3@huawei.com> - 2.38-57
+- math: Improve layout of exp/exp10 data
+- AArch64: Use prefer_sve_ifuncs for SVE memset
+- AArch64: Add SVE memset
+- math: Improve layout of expf data
+- AArch64: Remove zva_128 from memset
+- AArch64: Optimize memset
+- AArch64: Improve generic strlen
+- assert: Add test for CVE-2025-0395
+
 * Wed Mar 12 2025 xiajimei  <xiejiamei@hygon.cn> - 2.38-56
 - x86: Enable non-temporal memset for Hygon processors
 - x86: Add `Avoid_STOSB` tunable to allow NT memset without ERMS
diff --git a/math-Improve-layout-of-exp-exp10-data.patch b/math-Improve-layout-of-exp-exp10-data.patch
new file mode 100644
index 0000000..a0da041
--- /dev/null
+++ b/math-Improve-layout-of-exp-exp10-data.patch
@@ -0,0 +1,39 @@
+From 5a08d049dc5037e89eb95bb1506652f0043fa39e Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Fri, 13 Dec 2024 15:43:07 +0000
+Subject: [PATCH] math: Improve layout of exp/exp10 data
+
+GCC aligns global data to 16 bytes if their size is >= 16 bytes.  This patch
+changes the exp_data struct slightly so that the fields are better aligned
+and without gaps.  As a result on targets that support them, more load-pair
+instructions are used in exp.
+
+The exp benchmark improves 2.5%, "144bits" by 7.2%, "768bits" by 12.7% on
+Neoverse V2.
+
+Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+(cherry picked from commit 5afaf99edb326fd9f36eb306a828d129a3a1d7f7)
+---
+ sysdeps/ieee754/dbl-64/math_config.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h
+index 19af33fd86..52b720ecd1 100644
+--- a/sysdeps/ieee754/dbl-64/math_config.h
++++ b/sysdeps/ieee754/dbl-64/math_config.h
+@@ -195,10 +195,11 @@ check_uflow (double x)
+ extern const struct exp_data
+ {
+   double invln2N;
+-  double shift;
+   double negln2hiN;
+   double negln2loN;
+   double poly[4]; /* Last four coefficients.  */
++  double shift;
++
+   double exp2_shift;
+   double exp2_poly[EXP2_POLY_ORDER];
+   uint64_t tab[2*(1 << EXP_TABLE_BITS)];
+-- 
+2.27.0
+
diff --git a/math-Improve-layout-of-expf-data.patch b/math-Improve-layout-of-expf-data.patch
new file mode 100644
index 0000000..aa3ee5e
--- /dev/null
+++ b/math-Improve-layout-of-expf-data.patch
@@ -0,0 +1,34 @@
+From 3de5112326a4274c97f154f3d335c11965ee960c Mon Sep 17 00:00:00 2001
+From: Wilco Dijkstra <wilco.dijkstra@arm.com>
+Date: Wed, 24 Jul 2024 15:17:47 +0100
+Subject: [PATCH] math: Improve layout of expf data
+
+GCC aligns global data to 16 bytes if their size is >= 16 bytes.  This patch
+changes the exp2f_data struct slightly so that the fields are better aligned.
+As a result on targets that support them, load-pair instructions accessing
+poly_scaled and invln2_scaled are now 16-byte aligned.
+
+Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+(cherry picked from commit 44fa9c1080fe6a9539f0d2345b9d2ae37b8ee57a)
+---
+ sysdeps/ieee754/flt-32/math_config.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h
+index d1b06a1a90..5904eb9bac 100644
+--- a/sysdeps/ieee754/flt-32/math_config.h
++++ b/sysdeps/ieee754/flt-32/math_config.h
+@@ -166,9 +166,9 @@ extern const struct exp2f_data
+   uint64_t tab[1 << EXP2F_TABLE_BITS];
+   double shift_scaled;
+   double poly[EXP2F_POLY_ORDER];
+-  double shift;
+   double invln2_scaled;
+   double poly_scaled[EXP2F_POLY_ORDER];
++  double shift;
+ } __exp2f_data attribute_hidden;
+ 
+ #define LOGF_TABLE_BITS 4
+-- 
+2.27.0
+
-- 
Gitee