From 36a8eebca3449b756ff3ca442bc53b8a7ed66cbe Mon Sep 17 00:00:00 2001 From: 18855466553 Date: Sun, 28 Apr 2024 14:47:21 +0800 Subject: [PATCH] I9KBT5:8256488: Use ldpq/stpq instead of ld4/st4 for small copies in StubGenerator::copy_memory --- ...-stpq-instead-of-ld4-st4-for-small-c.patch | 60 +++++++++++++++++++ openjdk-1.8.0.spec | 6 ++ 2 files changed, 66 insertions(+) create mode 100644 8256488-Use-ldpq-stpq-instead-of-ld4-st4-for-small-c.patch diff --git a/8256488-Use-ldpq-stpq-instead-of-ld4-st4-for-small-c.patch b/8256488-Use-ldpq-stpq-instead-of-ld4-st4-for-small-c.patch new file mode 100644 index 0000000..a928998 --- /dev/null +++ b/8256488-Use-ldpq-stpq-instead-of-ld4-st4-for-small-c.patch @@ -0,0 +1,60 @@ +Subject: 8256488: Use ldpq/stpq instead of ld4/st4 for small copies in StubGenerator::copy_memory + +-- + .../cpu/aarch64/vm/stubGenerator_aarch64.cpp | 30 ++++++++++++++++--- + 1 file changed, 26 insertions(+), 4 deletions(-) + +diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp +index f61028d5007..cf66df296e4 100644 +--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp ++++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp +@@ -1149,10 +1149,10 @@ class StubGenerator: public StubCodeGenerator { + Register count, Register tmp, int step) { + copy_direction direction = step < 0 ? copy_backwards : copy_forwards; + bool is_backwards = step < 0; +- int granularity = uabs(step); ++ unsigned granularity = uabs(step); + const Register t0 = r3, t1 = r4; + +- // <= 96 bytes do inline. Direction doesn't matter because we always ++ // <= 80 (or 96 for SIMD) bytes do inline. Direction doesn't matter because we always + // load all the data before writing anything + Label copy4, copy8, copy16, copy32, copy80, copy128, copy_big, finish; + const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8; +@@ -1207,9 +1207,31 @@ class StubGenerator: public StubCodeGenerator { + // (96 bytes if SIMD because we do 32 byes per instruction) + __ bind(copy80); + if (UseSIMDForMemoryOps) { +- __ ld4(v0, v1, v2, v3, __ T16B, Address(s, 0)); ++ __ ldpq(v0, v1, Address(s, 0)); ++ __ ldpq(v2, v3, Address(s, 32)); ++ // Unaligned pointers can be an issue for copying. ++ // The issue has more chances to happen when granularity of data is ++ // less than 4(sizeof(jint)). Pointers for arrays of jint are at least ++ // 4 byte aligned. Pointers for arrays of jlong are 8 byte aligned. ++ // The most performance drop has been seen for the range 65-80 bytes. ++ // For such cases using the pair of ldp/stp instead of the third pair of ++ // ldpq/stpq fixes the performance issue. ++ if (granularity < sizeof (jint)) { ++ Label copy96; ++ __ cmp(count, u1(80/granularity)); ++ __ br(Assembler::HI, copy96); ++ __ ldp(t0, t1, Address(send, -16)); ++ ++ __ stpq(v0, v1, Address(d, 0)); ++ __ stpq(v2, v3, Address(d, 32)); ++ __ stp(t0, t1, Address(dend, -16)); ++ __ b(finish); ++ ++ __ bind(copy96); ++ } + __ ldpq(v4, v5, Address(send, -32)); +- __ st4(v0, v1, v2, v3, __ T16B, Address(d, 0)); ++ __ stpq(v0, v1, Address(d, 0)); ++ __ stpq(v2, v3, Address(d, 32)); + __ stpq(v4, v5, Address(dend, -32)); + } else { + __ ldp(t0, t1, Address(s, 0)); +-- +2.19.1 + diff --git a/openjdk-1.8.0.spec b/openjdk-1.8.0.spec index f3aed42..6a970e7 100644 --- a/openjdk-1.8.0.spec +++ b/openjdk-1.8.0.spec @@ -1318,6 +1318,8 @@ Patch426: fix-GCC-12-build-jdk8-fastdebug-error.patch Patch427: 8223485-C2-PhaseIdealLoop-create_new_if_for_predicat.patch Patch428: 8223486-split-if-update_uses-accesses-stale-idom-dat.patch +#412 +Patch429: 8256488-Use-ldpq-stpq-instead-of-ld4-st4-for-small-c.patch ############################################# # # Upstreamable patches @@ -1959,6 +1961,7 @@ pushd %{top_level_dir_name} %patch426 -p1 %patch427 -p1 %patch428 -p1 +%patch429 -p1 %endif %ifarch loongarch64 @@ -2617,6 +2620,9 @@ cjc.mainProgram(arg) %endif %changelog +* Sun Apr 28 2024 Autistic_boyya -1:1.8.0.412-b08.1 +- add 8256488-Use-ldpq-stpq-instead-of-ld4-st4-for-small-c.patch + * Thu Apr 18 2024 Autistic_boyya -1:1.8.0.412-b08.0 - del 8322725-tz-Update-Timezone-Data-to-2023d.patch - del 8325150-tz-Update-Timezone-Data-to-2024a.patch -- Gitee