diff --git a/0001-x86-Set-preferred-CPU-features-on-the-KH-40000-and-K.patch b/0001-x86-Set-preferred-CPU-features-on-the-KH-40000-and-K.patch
new file mode 100644
index 0000000000000000000000000000000000000000..003a4d5e1571985eb4cb2e756be8b19328576a03
--- /dev/null
+++ b/0001-x86-Set-preferred-CPU-features-on-the-KH-40000-and-K.patch
@@ -0,0 +1,102 @@
+From 907fce7a0bf5598d8181181ce9b82786113a066c Mon Sep 17 00:00:00 2001
+From: May <mayshao-oc@zhaoxin.com>
+Date: Wed, 15 Jan 2025 17:57:10 +0800
+Subject: [PATCH 1/3] x86: Set preferred CPU features on the KH-40000 and
+ KX-7000 Zhaoxin processors
+
+Fix code formatting under the Zhaoxin branch and add comments for
+different Zhaoxin models.
+
+Unaligned AVX load are slower on KH-40000 and KX-7000, so disable
+the AVX_Fast_Unaligned_Load.
+
+Enable Prefer_No_VZEROUPPER and Fast_Unaligned_Load features to
+use sse2_unaligned version of memset,strcpy and strcat.
+
+Signed-off-by: May <mayshao-oc@zhaoxin.com>
+---
+ sysdeps/x86/cpu-features.c | 51 ++++++++++++++++++++++++++------------
+ 1 file changed, 35 insertions(+), 16 deletions(-)
+
+diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
+index badf0888..43b5f562 100644
+--- a/sysdeps/x86/cpu-features.c
++++ b/sysdeps/x86/cpu-features.c
+@@ -907,39 +907,58 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
+ 
+       model += extended_model;
+       if (family == 0x6)
+-        {
+-          if (model == 0xf || model == 0x19)
+-            {
++	{
++	  /* Tuning for older Zhaoxin processors.  */
++	  if (model == 0xf || model == 0x19)
++	    {
+ 	      CPU_FEATURE_UNSET (cpu_features, AVX);
+ 	      CPU_FEATURE_UNSET (cpu_features, AVX2);
+ 
+-              cpu_features->preferred[index_arch_Slow_SSE4_2]
+-                |= bit_arch_Slow_SSE4_2;
++	      cpu_features->preferred[index_arch_Slow_SSE4_2]
++		  |= bit_arch_Slow_SSE4_2;
+ 
++	      /*  Unaligned AVX loads are slower.  */
+ 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+-		&= ~bit_arch_AVX_Fast_Unaligned_Load;
+-            }
+-        }
++		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
++	    }
++	}
+       else if (family == 0x7)
+-        {
+-	  if (model == 0x1b)
++	{
++	  switch (model)
+ 	    {
++	      /* Wudaokou microarch tuning.  */
++	    case 0x1b:
+ 	      CPU_FEATURE_UNSET (cpu_features, AVX);
+ 	      CPU_FEATURE_UNSET (cpu_features, AVX2);
+ 
+ 	      cpu_features->preferred[index_arch_Slow_SSE4_2]
+-		|= bit_arch_Slow_SSE4_2;
++		  |= bit_arch_Slow_SSE4_2;
+ 
+ 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+-		&= ~bit_arch_AVX_Fast_Unaligned_Load;
+-	    }
+-	  else if (model == 0x3b)
+-	    {
++		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
++	      break;
++
++	      /* Lujiazui microarch tuning.  */
++	    case 0x3b:
+ 	      CPU_FEATURE_UNSET (cpu_features, AVX);
+ 	      CPU_FEATURE_UNSET (cpu_features, AVX2);
+ 
+ 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+-		&= ~bit_arch_AVX_Fast_Unaligned_Load;
++		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
++	      break;
++
++	      /* Yongfeng and Shijidadao mircoarch tuning.  */
++	    case 0x5b:
++	    case 0x6b:
++	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
++		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
++
++	      /* To use sse2_unaligned versions of memset, strcpy and strcat.
++	       */
++	      cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
++		  |= (bit_arch_Prefer_No_VZEROUPPER
++		      | bit_arch_Fast_Unaligned_Load);
++	      break;
+ 	    }
+ 	}
+     }
+-- 
+2.27.0
+
diff --git a/0002-x86_64-Optimize-large-size-copy-in-memmove-ssse3.patch b/0002-x86_64-Optimize-large-size-copy-in-memmove-ssse3.patch
new file mode 100644
index 0000000000000000000000000000000000000000..7194d687feb8c427427f6e17eb40c7f65be91da2
--- /dev/null
+++ b/0002-x86_64-Optimize-large-size-copy-in-memmove-ssse3.patch
@@ -0,0 +1,77 @@
+From cce571375e6a31b3ff8cdf6017e677c777d2e9d5 Mon Sep 17 00:00:00 2001
+From: May <mayshao-oc@zhaoxin.com>
+Date: Wed, 15 Jan 2025 17:59:55 +0800
+Subject: [PATCH 2/3] x86_64: Optimize large size copy in memmove-ssse3
+
+This patch optimizes large size copy using normal store when src > dst
+and overlap.  Make it the same as the logic in memmove-vec-unaligned-erms.S.
+
+Current memmove-ssse3 use '__x86_shared_cache_size_half' as the non-
+temporal threshold, this patch updates that value to
+'__x86_shared_non_temporal_threshold'.  Currently, the
+__x86_shared_non_temporal_threshold is cpu-specific, and different CPUs
+will have different values based on the related nt-benchmark results.
+However, in memmove-ssse3, the nontemporal threshold uses
+'__x86_shared_cache_size_half', which sounds unreasonable.
+
+The performance is not changed drastically although shows overall
+improvements without any major regressions or gains.
+
+Results on Zhaoxin KX-7000:
+
+bench-memcpy geometric_mean(N=20) New / Original: 0.999
+bench-memcpy-random geometric_mean(N=20) New / Original: 0.999
+bench-memcpy-large geometric_mean(N=20) New / Original: 0.978
+bench-memmove geometric_mean(N=20) New / Original: 1.000
+bench-memmmove-large geometric_mean(N=20) New / Original: 0.962
+
+Results on Intel Core i5-6600K:
+
+bench-memcpy geometric_mean(N=20) New / Original: 1.001
+bench-memcpy-random geometric_mean(N=20) New / Original: 0.999
+bench-memcpy-large geometric_mean(N=20) New / Original: 1.001
+bench-memmove geometric_mean(N=20) New / Original: 0.995
+bench-memmmove-large geometric_mean(N=20) New / Original: 0.936
+
+Signed-off-by: May <mayshao-oc@zhaoxin.com>
+---
+ sysdeps/x86_64/multiarch/memmove-ssse3.S | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3.S b/sysdeps/x86_64/multiarch/memmove-ssse3.S
+index 460b0ec0..69561628 100644
+--- a/sysdeps/x86_64/multiarch/memmove-ssse3.S
++++ b/sysdeps/x86_64/multiarch/memmove-ssse3.S
+@@ -151,13 +151,10 @@ L(more_2x_vec):
+ 	   loop.  */
+ 	movups	%xmm0, (%rdi)
+ 
+-# ifdef SHARED_CACHE_SIZE_HALF
+-	cmp	$SHARED_CACHE_SIZE_HALF, %RDX_LP
+-# else
+-	cmp	__x86_shared_cache_size_half(%rip), %rdx
+-# endif
++	cmp	__x86_shared_non_temporal_threshold(%rip), %rdx
+ 	ja	L(large_memcpy)
+ 
++L(loop_fwd):
+ 	leaq	-64(%rdi, %rdx), %r8
+ 	andq	$-16, %rdi
+ 	movl	$48, %edx
+@@ -199,6 +196,13 @@ L(large_memcpy):
+ 	movups	-64(%r9, %rdx), %xmm10
+ 	movups	-80(%r9, %rdx), %xmm11
+ 
++	/* Check if src and dst overlap. If they do use cacheable
++	   writes to potentially gain positive interference between
++	   the loads during the memmove.  */
++	subq	%rdi, %r9
++	cmpq	%rdx, %r9
++	jb	L(loop_fwd)
++
+ 	sall	$5, %ecx
+ 	leal	(%rcx, %rcx, 2), %r8d
+ 	leaq	-96(%rdi, %rdx), %rcx
+-- 
+2.27.0
+
diff --git a/0003-x86-Set-default-non_temporal_threshold-for-Zhaoxin-p.patch b/0003-x86-Set-default-non_temporal_threshold-for-Zhaoxin-p.patch
new file mode 100644
index 0000000000000000000000000000000000000000..9e37aa4216762ca0e304c4849b514fd9f842c8d9
--- /dev/null
+++ b/0003-x86-Set-default-non_temporal_threshold-for-Zhaoxin-p.patch
@@ -0,0 +1,50 @@
+From 43745f60e5ab28e861204d43b9d93cf6685200b5 Mon Sep 17 00:00:00 2001
+From: May <mayshao-oc@zhaoxin.com>
+Date: Wed, 15 Jan 2025 19:00:48 +0800
+Subject: [PATCH 3/3] x86: Set default non_temporal_threshold for Zhaoxin
+ processors
+
+Current 'non_temporal_threshold' set to 'non_temporal_threshold_lowbound'
+on Zhaoxin processors without ERMS. The default
+'non_temporal_threshold_lowbound' is too small for the KH-40000 and KX-7000
+Zhaoxin processors, this patch updates the value to
+'shared / cachesize_non_temporal_divisor'.
+
+Signed-off-by: May <mayshao-oc@zhaoxin.com>
+---
+ sysdeps/x86/cpu-features.c | 1 +
+ sysdeps/x86/dl-cacheinfo.h | 6 ++++--
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
+index 43b5f562..f752ebd2 100644
+--- a/sysdeps/x86/cpu-features.c
++++ b/sysdeps/x86/cpu-features.c
+@@ -949,6 +949,7 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
+ 
+ 	      /* Yongfeng and Shijidadao mircoarch tuning.  */
+ 	    case 0x5b:
++	      cpu_features->cachesize_non_temporal_divisor = 2;
+ 	    case 0x6b:
+ 	      cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+ 		  &= ~bit_arch_AVX_Fast_Unaligned_Load;
+diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
+index cd4d0351..2c5b6d69 100644
+--- a/sysdeps/x86/dl-cacheinfo.h
++++ b/sysdeps/x86/dl-cacheinfo.h
+@@ -769,8 +769,10 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+   /* If no ERMS, we use the per-thread L3 chunking. Normal cacheable stores run
+      a higher risk of actually thrashing the cache as they don't have a HW LRU
+      hint. As well, their performance in highly parallel situations is
+-     noticeably worse.  */
+-  if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS))
++     noticeably worse. Zhaoxin processors are an exception, the lowbound is not
++     suitable for them based on actual test data.  */
++  if (!CPU_FEATURE_USABLE_P (cpu_features, ERMS)
++      && cpu_features->basic.kind != arch_kind_zhaoxin)
+     non_temporal_threshold = non_temporal_threshold_lowbound;
+   /* SIZE_MAX >> 4 because memmove-vec-unaligned-erms right-shifts the value of
+      'x86_non_temporal_threshold' by `LOG_4X_MEMCPY_THRESH` (4) and it is best
+-- 
+2.27.0
+
diff --git a/glibc.spec b/glibc.spec
index 2bbcbcfb577fa15f9e872853c5744c06cade9f83..5fa5d7af0c48687a29ac34723873eb8fb51b1c5a 100644
--- a/glibc.spec
+++ b/glibc.spec
@@ -1,4 +1,4 @@
-%define anolis_release 5
+%define anolis_release 6
 
 %bcond_without testsuite
 %bcond_without benchtests
@@ -156,6 +156,10 @@ Patch3044: 0016-LoongArch-Simplify-the-autoconf-check-for-static-PIE.patch
 Patch3045: 0017-nptl-Add-thread_pointer.h-for-LoongArch.patch
 Patch3046: 0018-nptl-fix-__builtin_thread_pointer-detection-on-Loong.patch
 
+Patch3047: 0001-x86-Set-preferred-CPU-features-on-the-KH-40000-and-K.patch
+Patch3048: 0002-x86_64-Optimize-large-size-copy-in-memmove-ssse3.patch
+Patch3049: 0003-x86-Set-default-non_temporal_threshold-for-Zhaoxin-p.patch
+
 BuildRequires: audit-libs-devel >= 1.1.3 libcap-devel systemtap-sdt-devel
 BuildRequires: procps-ng util-linux gawk sed >= 3.95 gettext
 BuildRequires: python3 python3-devel
@@ -1108,6 +1112,9 @@ update_gconv_modules_cache ()
 %{_libdir}/libpthread_nonshared.a
 
 %changelog
+* Wed Jan 15 2025 MayShao <mayshao-oc@zhaoxin.com> - 2.38-6
+- x86: Set preferred CPU features and default NT threshold for Zhaoxin processors
+
 * Fri Dec 20 2024 Zhao Hang <wb-zh951434@alibaba-inc.com> - 2.38-5
 - fix CVE-2024-33602