diff --git a/AArch64-Update-A64FX-memset-not-to-degrade-at-16KB.patch b/AArch64-Update-A64FX-memset-not-to-degrade-at-16KB.patch new file mode 100644 index 0000000000000000000000000000000000000000..f7ce84d2d510140196eab38d24c48361bbc93f2f --- /dev/null +++ b/AArch64-Update-A64FX-memset-not-to-degrade-at-16KB.patch @@ -0,0 +1,39 @@ +From 23777232c23f80809613bdfa329f63aadf992922 Mon Sep 17 00:00:00 2001 +From: Naohiro Tamura via Libc-alpha +Date: Fri, 27 Aug 2021 05:03:04 +0000 +Subject: [PATCH] AArch64: Update A64FX memset not to degrade at 16KB + +This patch updates unroll8 code so as not to degrade at the peak +performance 16KB for both FX1000 and FX700. + +Inserted 2 instructions at the beginning of the unroll8 loop, +cmp and branch, are a workaround that is found heuristically. + +Reviewed-by: Wilco Dijkstra +--- + sysdeps/aarch64/multiarch/memset_a64fx.S | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S +index 7bf759b..f7dfdaa 100644 +--- a/sysdeps/aarch64/multiarch/memset_a64fx.S ++++ b/sysdeps/aarch64/multiarch/memset_a64fx.S +@@ -96,7 +96,14 @@ L(vl_agnostic): // VL Agnostic + L(unroll8): + sub count, count, tmp1 + .p2align 4 +-1: st1b_unroll 0, 7 ++ // The 2 instructions at the beginning of the following loop, ++ // cmp and branch, are a workaround so as not to degrade at ++ // the peak performance 16KB. ++ // It is found heuristically and the branch condition, b.ne, ++ // is chosen intentionally never to jump. ++1: cmp xzr, xzr ++ b.ne 1b ++ st1b_unroll 0, 7 + add dst, dst, tmp1 + subs count, count, tmp1 + b.hi 1b +-- +1.8.3.1 + diff --git a/glibc.spec b/glibc.spec index 275f792ce958fdd626a2ba1a727e34b1198624f0..80db7b85202dc7e0af499ff22f57f870ae578ae3 100644 --- a/glibc.spec +++ b/glibc.spec @@ -65,7 +65,7 @@ ############################################################################## Name: glibc Version: 2.34 -Release: 18 +Release: 19 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -118,6 +118,7 @@ Patch31: mtrace-Fix-output-with-PIE-and-ASLR-BZ-22716.patch Patch32: rtld-copy-terminating-null-in-tunables_strdup-bug-28.patch Patch33: Use-__executable_start-as-the-lowest-address-for-pro.patch Patch34: x86-64-Use-testl-to-check-__x86_string_control.patch +Patch35: AArch64-Update-A64FX-memset-not-to-degrade-at-16KB.patch #Patch9000: turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch @@ -1307,6 +1308,9 @@ fi %endif %changelog +* Fri Oct 29 2021 Qingqing Li - 2.34-19 +- aarch64: update a64fx memset not to degrade at 16KB + * Thu Oct 28 2021 Qingqing Li - 2.34-18 - use testl instead of andl to check __x86_string_control to avoid updating __x86_string_control