From cbfc1c2bdad34890d27a90a9338918d13a95843d Mon Sep 17 00:00:00 2001 From: Yang Yanchao Date: Wed, 23 Feb 2022 15:34:35 +0800 Subject: [PATCH] strcmp: delete align for loop_aligned In Kunpeng-920, the performance of strcmp deteriorates only when the 16 to 23 characters are different.Or the string is only 16-23 characters.That shows 2 misses per iteration which means this is a branch predictor issue indeed. In the preceding scenario, strcmp performance is 300% worse than expected. Fortunately, this problem can be solved by modifying the alignment of the functions. Signed-off-by: Yang Yanchao (cherry picked from commit 03b9b275269e525a8b8c66414417b1193ea4c208) --- glibc.spec | 6 +++- strcmp-delete-align-for-loop_aligned.patch | 32 ++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 strcmp-delete-align-for-loop_aligned.patch diff --git a/glibc.spec b/glibc.spec index a20e3f7..9775f96 100644 --- a/glibc.spec +++ b/glibc.spec @@ -66,7 +66,7 @@ ############################################################################## Name: glibc Version: 2.34 -Release: 58 +Release: 59 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -205,6 +205,7 @@ Patch9011: delete-check-installed-headers-c-and-check-installed.patch Patch9012: fix-CVE-2019-1010023.patch Patch9013: fix-tst-glibcsyscalls-due-to-kernel-reserved-some-sy.patch Patch9014: use-region-to-instead-of-country-for-extract-timezon.patch +Patch9015: strcmp-delete-align-for-loop_aligned.patch Obsoletes: nscd < 2.35 Provides: ldconfig rtld(GNU_HASH) bundled(gnulib) @@ -1219,6 +1220,9 @@ fi %endif %changelog +* Wed Feb 23 2022 Yang Yanchao - 2.34-59 +- strcmp: delete align for loop_aligned + * Wed Feb 23 2022 Yang Yanchao - 2.34-58 - The release of glibc.src.rpm in OpenEuler is not based on the architecture. Developers only have glibc.src.rpm in the ARM, so add all testsuite_whitelist in glibc.src.rpm. diff --git a/strcmp-delete-align-for-loop_aligned.patch b/strcmp-delete-align-for-loop_aligned.patch new file mode 100644 index 0000000..cf5b15a --- /dev/null +++ b/strcmp-delete-align-for-loop_aligned.patch @@ -0,0 +1,32 @@ +From 9bbffed83b93f633b272368fc536a4f24e9942e6 Mon Sep 17 00:00:00 2001 +From: Yang Yanchao +Date: Mon, 21 Feb 2022 14:25:25 +0800 +Subject: [PATCH] strcmp: delete align for loop_aligned + +In Kunpeng-920, the performance of strcmp deteriorates only +when the 16 to 23 characters are different.Or the string is +only 16-23 characters.That shows 2 misses per iteration which +means this is a branch predictor issue indeed. +In the preceding scenario, strcmp performance is 300% worse than expected. + +Fortunately, this problem can be solved by modifying the alignment of the functions. +--- + sysdeps/aarch64/strcmp.S | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S +index f225d718..7a048b66 100644 +--- a/sysdeps/aarch64/strcmp.S ++++ b/sysdeps/aarch64/strcmp.S +@@ -71,8 +71,6 @@ ENTRY(strcmp) + b.ne L(misaligned8) + cbnz tmp, L(mutual_align) + +- .p2align 4 +- + L(loop_aligned): + ldr data2, [src1, off2] + ldr data1, [src1], 8 +-- +2.33.0 + -- Gitee